Major updates in model + gathered some data.

This commit is contained in:
Sebastian Kutny 2023-04-04 18:01:11 +02:00
parent 307c291862
commit fbd287ea76
19 changed files with 108234 additions and 61 deletions

9101
Data/ac_dc.csv Normal file

File diff suppressed because it is too large Load Diff

10181
Data/aerosmith.csv Normal file

File diff suppressed because it is too large Load Diff

4171
Data/alice_in_chains.csv Normal file

File diff suppressed because it is too large Load Diff

5566
Data/arctic_monkeys.csv Normal file

File diff suppressed because it is too large Load Diff

8046
Data/depeche_mode.csv Normal file

File diff suppressed because it is too large Load Diff

7650
Data/gorillaz.csv Normal file

File diff suppressed because it is too large Load Diff

3764
Data/jimi_hendrix.csv Normal file

File diff suppressed because it is too large Load Diff

4894
Data/josh_homme.csv Normal file

File diff suppressed because it is too large Load Diff

10873
Data/kult.csv Normal file

File diff suppressed because it is too large Load Diff

1375
Data/kyuss.csv Normal file

File diff suppressed because it is too large Load Diff

3529
Data/led_zeppelin.csv Normal file

File diff suppressed because it is too large Load Diff

10874
Data/metallica.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

12628
Data/red_hot_chili_peppers.csv Normal file

File diff suppressed because it is too large Load Diff

6748
Data/the_cult.csv Normal file

File diff suppressed because it is too large Load Diff

5248
Data/the_doors.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,3 +3,18 @@ https://www.azlyrics.com/b/blacksabbath.html
https://www.tekstowo.pl/piosenki_artysty,paktofonika.html https://www.tekstowo.pl/piosenki_artysty,paktofonika.html
https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
https://www.tekstowo.pl/piosenki_artysty,kuki.html https://www.tekstowo.pl/piosenki_artysty,kuki.html
https://www.tekstowo.pl/piosenki_artysty,queens_of_the_stone_age.html
https://www.tekstowo.pl/piosenki_artysty,kyuss.html
https://www.tekstowo.pl/piosenki_artysty,depeche_mode.html
https://www.tekstowo.pl/piosenki_artysty,ac_dc.html
https://www.tekstowo.pl/piosenki_artysty,aerosmith.html
https://www.tekstowo.pl/piosenki_artysty,alice_in_chains.html
https://www.tekstowo.pl/piosenki_artysty,arctic_monkeys.html
https://www.tekstowo.pl/piosenki_artysty,the_cult.html
https://www.tekstowo.pl/piosenki_artysty,the_doors.html
https://www.tekstowo.pl/piosenki_artysty,gorillaz.html
https://www.tekstowo.pl/piosenki_artysty,jimi_hendrix.html
https://www.tekstowo.pl/piosenki_artysty,kult.html
https://www.tekstowo.pl/piosenki_artysty,led_zeppelin.html
https://www.tekstowo.pl/piosenki_artysty,metallica.html
https://www.tekstowo.pl/piosenki_artysty,red_hot_chili_peppers.html

View File

@ -25,18 +25,13 @@ def generate_song(name):
dataset = clean_data(os.path.join(path, name)) dataset = clean_data(os.path.join(path, name))
n_gram = int(input("Select number of words in Markov state: ")) n_gram = int(input("Select number of words in Markov state: "))
number_of_verses = int(input("Select number of verses: ")) number_of_verses = int(input("Select number of verses: "))
words_in_verses = int((int(input("Select number of words in verses: ")) - 1) / n_gram) words_in_verses = int(input("Select number of words in verses: ")) - n_gram
# degree_of_chain = int(input("Select degree of chain: "))
model = create_markov_model(dataset, n_gram) model = create_markov_model(dataset, n_gram)
print('\n') print('\n')
last_state = random.choice(list(model.keys()))
rime = None rime = None
for i in range(number_of_verses): for i in range(number_of_verses):
generated_lyrics, last_state = generate_lyrics(model, last_state, words_in_verses, True if i == 0 else False, rime) generated_lyrics, rime = generate_lyrics(model, random.choice(list(model.keys())), words_in_verses, True if i % 2 == 1 else False, rime)
print(generated_lyrics) print(generated_lyrics)
rime = last_state
last_state = random.choices(list(model[last_state].keys()),
list(model[last_state].values()))[0]
def scraping(): def scraping():

View File

@ -1,10 +1,9 @@
import math
import random import random
import re import re
from nltk import SyllableTokenizer from nltk import SyllableTokenizer
from nltk.tokenize import word_tokenize from nltk.tokenize import word_tokenize
import pandas as pd import pandas as pd
import numpy as np
from scipy import sparse
def clean_data(name): def clean_data(name):
@ -12,6 +11,7 @@ def clean_data(name):
rows = document["Lyrics"].values.tolist() rows = document["Lyrics"].values.tolist()
dataset = [] dataset = []
for lyric in rows: for lyric in rows:
if isinstance(lyric, str):
lyric = lyric.lower() lyric = lyric.lower()
lyric = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", lyric) lyric = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", lyric)
lyric = re.sub(r"\([A-Za-z0-9:\s\.\?\,\&\*]+\)", "", lyric) lyric = re.sub(r"\([A-Za-z0-9:\s\.\?\,\&\*]+\)", "", lyric)
@ -36,9 +36,8 @@ def create_markov_model(dataset, n_gram):
current_state, next_state = "", "" current_state, next_state = "", ""
for j in range(n_gram): for j in range(n_gram):
current_state += dataset[i + j] + " " current_state += dataset[i + j] + " "
next_state += dataset[i + j + n_gram] + " " next_state += dataset[i + n_gram]
current_state = current_state[:-1] current_state = current_state[:-1]
next_state = next_state[:-1]
if current_state not in markov_model: if current_state not in markov_model:
markov_model[current_state] = {} markov_model[current_state] = {}
markov_model[current_state][next_state] = 1 markov_model[current_state][next_state] = 1
@ -51,58 +50,54 @@ def create_markov_model(dataset, n_gram):
total = sum(transition.values()) total = sum(transition.values())
for state, count in transition.items(): for state, count in transition.items():
markov_model[current_state][state] = count / total markov_model[current_state][state] = count / total
"""matrix = [[0 for _ in range(len(markov_model.items()))] for _ in range(int(len(markov_model.items())))]
for current_state, transition in markov_model.items():
tempRow = list(markov_model.items())
indexRow = [idx for idx, key in enumerate(tempRow) if key[0] == current_state]
total = sum(transition.values())
for state, count in transition.items():
tempCol = list(transition.items())
indexCol = [idx for idx, key in enumerate(tempCol) if key[0] == state]
markov_model[current_state][state] = count / total
matrix[indexRow[0]][indexCol[0]] = markov_model[current_state][state]
matrix = np.array(matrix)
for i in range(n_step):
matrix = matrix.dot(matrix)
for current_state, transition in markov_model.items():
tempRow = list(markov_model.items())
indexRow = [idx for idx, key in enumerate(tempRow) if key[0] == current_state]
for state, count in transition.items():
tempCol = list(transition.items())
indexCol = [idx for idx, key in enumerate(tempCol) if key[0] == state]
markov_model[current_state][state] += matrix[indexRow[0]][indexCol[0]]"""
return markov_model return markov_model
def generate_lyrics(markov_model, start, limit, isStartingVerse, rime): def default_next_state(markov_model, current_state, lyrics):
next_state = random.choices(list(markov_model[current_state].keys()),
list(markov_model[current_state].values()))
lyrics += next_state[0] + " "
n_gram = len(current_state.split(" "))
current_state = ""
for i in range(n_gram + 1, 1, -1):
current_state += lyrics.split(" ")[-i] + " "
current_state = current_state[:-1]
return current_state, lyrics
def rhyming_next_state(rime_states, current_state, lyrics):
next_state = random.choices(list(rime_states.keys()),
list(rime_states.values()))
lyrics += next_state[0] + " "
n_gram = len(current_state.split(" "))
current_state = ""
for i in range(n_gram + 1, 1, -1):
current_state += lyrics.split(" ")[-i] + " "
current_state = current_state[:-1]
return current_state, lyrics
def generate_lyrics(markov_model, start, limit, try_rhyme, rime):
n = 0 n = 0
current_state = start current_state = start
lyrics = "" lyrics = ""
lyrics += current_state + " " lyrics += current_state + " "
lyrics = lyrics[0].upper() + lyrics[1:] lyrics = lyrics[0].upper() + lyrics[1:]
while n < limit: while n < limit:
if n == limit - 1 and not isStartingVerse: if n == limit - 1 and try_rhyme is True:
rime = rime.split(" ")[-1] rime = rime.split(" ")[-1]
tk = SyllableTokenizer() tk = SyllableTokenizer()
rime_syllab = tk.tokenize(rime)[-1] rime_syllab = tk.tokenize(rime)[-1]
rime_states = {} rime_states = {}
for state, probability in markov_model[current_state].items(): for state, probability in markov_model[current_state].items():
word = state.split(" ")[-1] syllab = tk.tokenize(state)[-1]
syllab = tk.tokenize(word)[-1] if rime_syllab == syllab and rime != state:
if rime_syllab == syllab and rime != word:
rime_states.update({state: probability}) rime_states.update({state: probability})
if rime_states: if rime_states:
next_state = random.choices(list(rime_states.keys()), current_state, lyrics = rhyming_next_state(rime_states, current_state, lyrics)
list(rime_states.values()))
current_state = next_state[0]
else: else:
next_state = random.choices(list(markov_model[current_state].keys()), current_state, lyrics = default_next_state(markov_model, current_state, lyrics)
list(markov_model[current_state].values()))
current_state = next_state[0]
else: else:
next_state = random.choices(list(markov_model[current_state].keys()), current_state, lyrics = default_next_state(markov_model, current_state, lyrics)
list(markov_model[current_state].values()))
current_state = next_state[0]
lyrics += current_state + " "
n += 1 n += 1
return lyrics, current_state return lyrics, current_state