mirror of
https://github.com/WallyS02/Song-Lyrics-Generator.git
synced 2025-01-18 08:19:19 +00:00
Major updates in model + gathered some data.
This commit is contained in:
parent
307c291862
commit
fbd287ea76
9101
Data/ac_dc.csv
Normal file
9101
Data/ac_dc.csv
Normal file
File diff suppressed because it is too large
Load Diff
10181
Data/aerosmith.csv
Normal file
10181
Data/aerosmith.csv
Normal file
File diff suppressed because it is too large
Load Diff
4171
Data/alice_in_chains.csv
Normal file
4171
Data/alice_in_chains.csv
Normal file
File diff suppressed because it is too large
Load Diff
5566
Data/arctic_monkeys.csv
Normal file
5566
Data/arctic_monkeys.csv
Normal file
File diff suppressed because it is too large
Load Diff
8046
Data/depeche_mode.csv
Normal file
8046
Data/depeche_mode.csv
Normal file
File diff suppressed because it is too large
Load Diff
7650
Data/gorillaz.csv
Normal file
7650
Data/gorillaz.csv
Normal file
File diff suppressed because it is too large
Load Diff
3764
Data/jimi_hendrix.csv
Normal file
3764
Data/jimi_hendrix.csv
Normal file
File diff suppressed because it is too large
Load Diff
4894
Data/josh_homme.csv
Normal file
4894
Data/josh_homme.csv
Normal file
File diff suppressed because it is too large
Load Diff
10873
Data/kult.csv
Normal file
10873
Data/kult.csv
Normal file
File diff suppressed because it is too large
Load Diff
1375
Data/kyuss.csv
Normal file
1375
Data/kyuss.csv
Normal file
File diff suppressed because it is too large
Load Diff
3529
Data/led_zeppelin.csv
Normal file
3529
Data/led_zeppelin.csv
Normal file
File diff suppressed because it is too large
Load Diff
10874
Data/metallica.csv
Normal file
10874
Data/metallica.csv
Normal file
File diff suppressed because it is too large
Load Diff
3520
Data/queens_of_the_stone_age.csv
Normal file
3520
Data/queens_of_the_stone_age.csv
Normal file
File diff suppressed because it is too large
Load Diff
12628
Data/red_hot_chili_peppers.csv
Normal file
12628
Data/red_hot_chili_peppers.csv
Normal file
File diff suppressed because it is too large
Load Diff
6748
Data/the_cult.csv
Normal file
6748
Data/the_cult.csv
Normal file
File diff suppressed because it is too large
Load Diff
5248
Data/the_doors.csv
Normal file
5248
Data/the_doors.csv
Normal file
File diff suppressed because it is too large
Load Diff
15
links.txt
15
links.txt
@ -3,3 +3,18 @@ https://www.azlyrics.com/b/blacksabbath.html
|
|||||||
https://www.tekstowo.pl/piosenki_artysty,paktofonika.html
|
https://www.tekstowo.pl/piosenki_artysty,paktofonika.html
|
||||||
https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
|
https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
|
||||||
https://www.tekstowo.pl/piosenki_artysty,kuki.html
|
https://www.tekstowo.pl/piosenki_artysty,kuki.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,queens_of_the_stone_age.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,kyuss.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,depeche_mode.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,ac_dc.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,aerosmith.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,alice_in_chains.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,arctic_monkeys.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,the_cult.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,the_doors.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,gorillaz.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,jimi_hendrix.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,kult.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,led_zeppelin.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,metallica.html
|
||||||
|
https://www.tekstowo.pl/piosenki_artysty,red_hot_chili_peppers.html
|
9
main.py
9
main.py
@ -25,18 +25,13 @@ def generate_song(name):
|
|||||||
dataset = clean_data(os.path.join(path, name))
|
dataset = clean_data(os.path.join(path, name))
|
||||||
n_gram = int(input("Select number of words in Markov state: "))
|
n_gram = int(input("Select number of words in Markov state: "))
|
||||||
number_of_verses = int(input("Select number of verses: "))
|
number_of_verses = int(input("Select number of verses: "))
|
||||||
words_in_verses = int((int(input("Select number of words in verses: ")) - 1) / n_gram)
|
words_in_verses = int(input("Select number of words in verses: ")) - n_gram
|
||||||
# degree_of_chain = int(input("Select degree of chain: "))
|
|
||||||
model = create_markov_model(dataset, n_gram)
|
model = create_markov_model(dataset, n_gram)
|
||||||
print('\n')
|
print('\n')
|
||||||
last_state = random.choice(list(model.keys()))
|
|
||||||
rime = None
|
rime = None
|
||||||
for i in range(number_of_verses):
|
for i in range(number_of_verses):
|
||||||
generated_lyrics, last_state = generate_lyrics(model, last_state, words_in_verses, True if i == 0 else False, rime)
|
generated_lyrics, rime = generate_lyrics(model, random.choice(list(model.keys())), words_in_verses, True if i % 2 == 1 else False, rime)
|
||||||
print(generated_lyrics)
|
print(generated_lyrics)
|
||||||
rime = last_state
|
|
||||||
last_state = random.choices(list(model[last_state].keys()),
|
|
||||||
list(model[last_state].values()))[0]
|
|
||||||
|
|
||||||
|
|
||||||
def scraping():
|
def scraping():
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
|
import math
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
from nltk import SyllableTokenizer
|
from nltk import SyllableTokenizer
|
||||||
from nltk.tokenize import word_tokenize
|
from nltk.tokenize import word_tokenize
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
|
||||||
from scipy import sparse
|
|
||||||
|
|
||||||
|
|
||||||
def clean_data(name):
|
def clean_data(name):
|
||||||
@ -12,6 +11,7 @@ def clean_data(name):
|
|||||||
rows = document["Lyrics"].values.tolist()
|
rows = document["Lyrics"].values.tolist()
|
||||||
dataset = []
|
dataset = []
|
||||||
for lyric in rows:
|
for lyric in rows:
|
||||||
|
if isinstance(lyric, str):
|
||||||
lyric = lyric.lower()
|
lyric = lyric.lower()
|
||||||
lyric = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", lyric)
|
lyric = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", lyric)
|
||||||
lyric = re.sub(r"\([A-Za-z0-9:\s\.\?\,\&\*]+\)", "", lyric)
|
lyric = re.sub(r"\([A-Za-z0-9:\s\.\?\,\&\*]+\)", "", lyric)
|
||||||
@ -36,9 +36,8 @@ def create_markov_model(dataset, n_gram):
|
|||||||
current_state, next_state = "", ""
|
current_state, next_state = "", ""
|
||||||
for j in range(n_gram):
|
for j in range(n_gram):
|
||||||
current_state += dataset[i + j] + " "
|
current_state += dataset[i + j] + " "
|
||||||
next_state += dataset[i + j + n_gram] + " "
|
next_state += dataset[i + n_gram]
|
||||||
current_state = current_state[:-1]
|
current_state = current_state[:-1]
|
||||||
next_state = next_state[:-1]
|
|
||||||
if current_state not in markov_model:
|
if current_state not in markov_model:
|
||||||
markov_model[current_state] = {}
|
markov_model[current_state] = {}
|
||||||
markov_model[current_state][next_state] = 1
|
markov_model[current_state][next_state] = 1
|
||||||
@ -51,58 +50,54 @@ def create_markov_model(dataset, n_gram):
|
|||||||
total = sum(transition.values())
|
total = sum(transition.values())
|
||||||
for state, count in transition.items():
|
for state, count in transition.items():
|
||||||
markov_model[current_state][state] = count / total
|
markov_model[current_state][state] = count / total
|
||||||
"""matrix = [[0 for _ in range(len(markov_model.items()))] for _ in range(int(len(markov_model.items())))]
|
|
||||||
for current_state, transition in markov_model.items():
|
|
||||||
tempRow = list(markov_model.items())
|
|
||||||
indexRow = [idx for idx, key in enumerate(tempRow) if key[0] == current_state]
|
|
||||||
total = sum(transition.values())
|
|
||||||
for state, count in transition.items():
|
|
||||||
tempCol = list(transition.items())
|
|
||||||
indexCol = [idx for idx, key in enumerate(tempCol) if key[0] == state]
|
|
||||||
markov_model[current_state][state] = count / total
|
|
||||||
matrix[indexRow[0]][indexCol[0]] = markov_model[current_state][state]
|
|
||||||
matrix = np.array(matrix)
|
|
||||||
for i in range(n_step):
|
|
||||||
matrix = matrix.dot(matrix)
|
|
||||||
for current_state, transition in markov_model.items():
|
|
||||||
tempRow = list(markov_model.items())
|
|
||||||
indexRow = [idx for idx, key in enumerate(tempRow) if key[0] == current_state]
|
|
||||||
for state, count in transition.items():
|
|
||||||
tempCol = list(transition.items())
|
|
||||||
indexCol = [idx for idx, key in enumerate(tempCol) if key[0] == state]
|
|
||||||
markov_model[current_state][state] += matrix[indexRow[0]][indexCol[0]]"""
|
|
||||||
return markov_model
|
return markov_model
|
||||||
|
|
||||||
|
|
||||||
def generate_lyrics(markov_model, start, limit, isStartingVerse, rime):
|
def default_next_state(markov_model, current_state, lyrics):
|
||||||
|
next_state = random.choices(list(markov_model[current_state].keys()),
|
||||||
|
list(markov_model[current_state].values()))
|
||||||
|
lyrics += next_state[0] + " "
|
||||||
|
n_gram = len(current_state.split(" "))
|
||||||
|
current_state = ""
|
||||||
|
for i in range(n_gram + 1, 1, -1):
|
||||||
|
current_state += lyrics.split(" ")[-i] + " "
|
||||||
|
current_state = current_state[:-1]
|
||||||
|
return current_state, lyrics
|
||||||
|
|
||||||
|
|
||||||
|
def rhyming_next_state(rime_states, current_state, lyrics):
|
||||||
|
next_state = random.choices(list(rime_states.keys()),
|
||||||
|
list(rime_states.values()))
|
||||||
|
lyrics += next_state[0] + " "
|
||||||
|
n_gram = len(current_state.split(" "))
|
||||||
|
current_state = ""
|
||||||
|
for i in range(n_gram + 1, 1, -1):
|
||||||
|
current_state += lyrics.split(" ")[-i] + " "
|
||||||
|
current_state = current_state[:-1]
|
||||||
|
return current_state, lyrics
|
||||||
|
|
||||||
|
|
||||||
|
def generate_lyrics(markov_model, start, limit, try_rhyme, rime):
|
||||||
n = 0
|
n = 0
|
||||||
current_state = start
|
current_state = start
|
||||||
lyrics = ""
|
lyrics = ""
|
||||||
lyrics += current_state + " "
|
lyrics += current_state + " "
|
||||||
lyrics = lyrics[0].upper() + lyrics[1:]
|
lyrics = lyrics[0].upper() + lyrics[1:]
|
||||||
while n < limit:
|
while n < limit:
|
||||||
if n == limit - 1 and not isStartingVerse:
|
if n == limit - 1 and try_rhyme is True:
|
||||||
rime = rime.split(" ")[-1]
|
rime = rime.split(" ")[-1]
|
||||||
tk = SyllableTokenizer()
|
tk = SyllableTokenizer()
|
||||||
rime_syllab = tk.tokenize(rime)[-1]
|
rime_syllab = tk.tokenize(rime)[-1]
|
||||||
rime_states = {}
|
rime_states = {}
|
||||||
for state, probability in markov_model[current_state].items():
|
for state, probability in markov_model[current_state].items():
|
||||||
word = state.split(" ")[-1]
|
syllab = tk.tokenize(state)[-1]
|
||||||
syllab = tk.tokenize(word)[-1]
|
if rime_syllab == syllab and rime != state:
|
||||||
if rime_syllab == syllab and rime != word:
|
|
||||||
rime_states.update({state: probability})
|
rime_states.update({state: probability})
|
||||||
if rime_states:
|
if rime_states:
|
||||||
next_state = random.choices(list(rime_states.keys()),
|
current_state, lyrics = rhyming_next_state(rime_states, current_state, lyrics)
|
||||||
list(rime_states.values()))
|
|
||||||
current_state = next_state[0]
|
|
||||||
else:
|
else:
|
||||||
next_state = random.choices(list(markov_model[current_state].keys()),
|
current_state, lyrics = default_next_state(markov_model, current_state, lyrics)
|
||||||
list(markov_model[current_state].values()))
|
|
||||||
current_state = next_state[0]
|
|
||||||
else:
|
else:
|
||||||
next_state = random.choices(list(markov_model[current_state].keys()),
|
current_state, lyrics = default_next_state(markov_model, current_state, lyrics)
|
||||||
list(markov_model[current_state].values()))
|
|
||||||
current_state = next_state[0]
|
|
||||||
lyrics += current_state + " "
|
|
||||||
n += 1
|
n += 1
|
||||||
return lyrics, current_state
|
return lyrics, current_state
|
||||||
|
Loading…
x
Reference in New Issue
Block a user