mirror of
https://github.com/WallyS02/Song-Lyrics-Generator.git
synced 2025-01-18 08:19:19 +00:00
Interface improvements.
This commit is contained in:
parent
a2c1694adb
commit
afeb9d579b
79
main.py
79
main.py
@ -1,16 +1,19 @@
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
from scrapper import scrap_data
|
from scrapper import scrap_data
|
||||||
from markov_model import clean_data
|
from markov_model import clean_data
|
||||||
from markov_model import create_markov_model
|
from markov_model import create_markov_model
|
||||||
from markov_model import generate_lyrics
|
from markov_model import generate_lyrics
|
||||||
|
|
||||||
black_sabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",
|
blacksabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",
|
||||||
"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
|
"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
|
||||||
"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
|
"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
|
||||||
"Dehumanizer", "Cross Purposes", "Forbidden", "13"]
|
"Dehumanizer", "Cross Purposes", "Forbidden", "13"]
|
||||||
|
|
||||||
pink_floyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",
|
pinkfloyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",
|
||||||
"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
|
"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
|
||||||
"Wish You Were Here", "Animals", "The Wall", "The Final Cut",
|
"Wish You Were Here", "Animals", "The Wall", "The Final Cut",
|
||||||
"A Momentary Lapse Of Reason", "The Division Bell"]
|
"A Momentary Lapse Of Reason", "The Division Bell"]
|
||||||
@ -24,36 +27,74 @@ def generate_song(name):
|
|||||||
dataset = clean_data(os.path.join(path, name))
|
dataset = clean_data(os.path.join(path, name))
|
||||||
n_gram = int(input("Select number of words in Markov state: "))
|
n_gram = int(input("Select number of words in Markov state: "))
|
||||||
number_of_verses = int(input("Select number of verses: "))
|
number_of_verses = int(input("Select number of verses: "))
|
||||||
words_in_verses = int(int(input("Select number of words in verses: ")) / n_gram)
|
words_in_verses = int((int(input("Select number of words in verses: ")) - 1) / n_gram)
|
||||||
model = create_markov_model(dataset, n_gram)
|
degree_of_chain = int(input("Select degree of chain: "))
|
||||||
|
model = create_markov_model(dataset, n_gram, degree_of_chain)
|
||||||
print('\n')
|
print('\n')
|
||||||
|
last_state = random.choice(list(model.keys()))
|
||||||
for i in range(number_of_verses):
|
for i in range(number_of_verses):
|
||||||
generated_lyrics = generate_lyrics(model, random.choice(list(model.keys())), words_in_verses)
|
generated_lyrics, last_state = generate_lyrics(model, last_state, words_in_verses)
|
||||||
print(generated_lyrics)
|
print(generated_lyrics)
|
||||||
|
last_state = random.choices(list(model[last_state].keys()),
|
||||||
|
list(model[last_state].values()))[0]
|
||||||
|
|
||||||
|
|
||||||
|
def scraping():
|
||||||
|
with open("links.txt", "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
for i in range(len(lines)):
|
||||||
|
if i != (len(lines) - 1):
|
||||||
|
print(str(i) + ".", lines[i][:-1])
|
||||||
|
else:
|
||||||
|
print(str(i) + ".", lines[i])
|
||||||
|
line_index = int(input("Select url to scrap: "))
|
||||||
|
url = lines[line_index]
|
||||||
|
if line_index != (len(lines) - 1):
|
||||||
|
url = url[:-1]
|
||||||
|
if url.split('/')[2] == 'www.azlyrics.com':
|
||||||
|
selected_albums_name = url.split('/')[4][:-5] + "_selected_albums"
|
||||||
|
if selected_albums_name in globals():
|
||||||
|
selected_albums = globals()[selected_albums_name]
|
||||||
|
scrap_data(url, selected_albums, time_stamp)
|
||||||
|
else:
|
||||||
|
print("Define selected albums in global list variable in format: bandname_selected_albums")
|
||||||
|
return
|
||||||
|
if url.split('/')[2] == 'www.tekstowo.pl':
|
||||||
|
scrap_data(url, [], 0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def merging():
|
||||||
|
name1 = input("Select first band file: ")
|
||||||
|
if os.path.exists(path + name1):
|
||||||
|
df1 = pd.read_csv(path + name1)
|
||||||
|
else:
|
||||||
|
print("No such file in directory!")
|
||||||
|
return
|
||||||
|
name2 = input("Select second band file: ")
|
||||||
|
if os.path.exists(path + name2):
|
||||||
|
df2 = pd.read_csv(path + name2)
|
||||||
|
else:
|
||||||
|
print("No such file in directory!")
|
||||||
|
return
|
||||||
|
dfResult = pd.concat([df1, df2], ignore_index=True)
|
||||||
|
result_name = input("Select name of result file: ")
|
||||||
|
dfResult.to_csv(path + result_name)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
print("Select data set to use in generation or other option:\n1. Pink Floyd lyrics generation\n2. Black Sabbath "
|
print("Select data set to use in generation or other option:\n1. Generate text based on input filename\n2. Scrap "
|
||||||
"lyrics generation\n3. Bracia Figo Fagot\n4. Paktofonika\n5. Fused English (aka Pink Sabbath) lyrics "
|
"data\n3. Merge CSV band's songs\n4. Exit")
|
||||||
"generation\n6. Fused Polish (aka Braciofonika Pigo Pagot)\n7. Scrap data\n8. Exit")
|
|
||||||
while True:
|
while True:
|
||||||
selection = int(input())
|
selection = int(input())
|
||||||
match selection:
|
match selection:
|
||||||
case 1:
|
case 1:
|
||||||
generate_song("Pink Floyd.csv")
|
name = input("Select name of data file: ")
|
||||||
|
generate_song(name)
|
||||||
case 2:
|
case 2:
|
||||||
generate_song("Black Sabbath.csv")
|
scraping()
|
||||||
case 3:
|
case 3:
|
||||||
generate_song("Bracia Figo Fagot.csv")
|
merging()
|
||||||
case 4:
|
case 4:
|
||||||
generate_song("Paktofonika.csv")
|
|
||||||
case 5:
|
|
||||||
generate_song("Pink Sabbath.csv")
|
|
||||||
case 6:
|
|
||||||
generate_song("Braciofonika Pigo Pagot.csv")
|
|
||||||
case 7:
|
|
||||||
scrap_data(pink_floyd_selected_albums, black_sabbath_selected_albums, time_stamp)
|
|
||||||
case 8:
|
|
||||||
break
|
break
|
||||||
print("\nCommand executed")
|
print("\nCommand executed")
|
||||||
|
|
||||||
|
@ -2,6 +2,8 @@ import random
|
|||||||
import re
|
import re
|
||||||
from nltk.tokenize import word_tokenize
|
from nltk.tokenize import word_tokenize
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from scipy import sparse
|
||||||
|
|
||||||
|
|
||||||
def clean_data(name):
|
def clean_data(name):
|
||||||
@ -27,9 +29,9 @@ def clean_data(name):
|
|||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
|
|
||||||
def create_markov_model(dataset, n_gram):
|
def create_markov_model(dataset, n_gram, n_step):
|
||||||
markov_model = {}
|
markov_model = {}
|
||||||
for i in range(len(dataset) - n_gram - 1):
|
for i in range(len(dataset) - 1 - 2 * n_gram):
|
||||||
current_state, next_state = "", ""
|
current_state, next_state = "", ""
|
||||||
for j in range(n_gram):
|
for j in range(n_gram):
|
||||||
current_state += dataset[i + j] + " "
|
current_state += dataset[i + j] + " "
|
||||||
@ -48,6 +50,26 @@ def create_markov_model(dataset, n_gram):
|
|||||||
total = sum(transition.values())
|
total = sum(transition.values())
|
||||||
for state, count in transition.items():
|
for state, count in transition.items():
|
||||||
markov_model[current_state][state] = count / total
|
markov_model[current_state][state] = count / total
|
||||||
|
"""matrix = [[0 for _ in range(len(markov_model.items()))] for _ in range(int(len(markov_model.items())))]
|
||||||
|
for current_state, transition in markov_model.items():
|
||||||
|
tempRow = list(markov_model.items())
|
||||||
|
indexRow = [idx for idx, key in enumerate(tempRow) if key[0] == current_state]
|
||||||
|
total = sum(transition.values())
|
||||||
|
for state, count in transition.items():
|
||||||
|
tempCol = list(transition.items())
|
||||||
|
indexCol = [idx for idx, key in enumerate(tempCol) if key[0] == state]
|
||||||
|
markov_model[current_state][state] = count / total
|
||||||
|
matrix[indexRow[0]][indexCol[0]] = markov_model[current_state][state]
|
||||||
|
matrix = np.array(matrix)
|
||||||
|
for i in range(n_step):
|
||||||
|
matrix = matrix.dot(matrix)
|
||||||
|
for current_state, transition in markov_model.items():
|
||||||
|
tempRow = list(markov_model.items())
|
||||||
|
indexRow = [idx for idx, key in enumerate(tempRow) if key[0] == current_state]
|
||||||
|
for state, count in transition.items():
|
||||||
|
tempCol = list(transition.items())
|
||||||
|
indexCol = [idx for idx, key in enumerate(tempCol) if key[0] == state]
|
||||||
|
markov_model[current_state][state] += matrix[indexRow[0]][indexCol[0]]"""
|
||||||
return markov_model
|
return markov_model
|
||||||
|
|
||||||
|
|
||||||
@ -63,4 +85,4 @@ def generate_lyrics(markov_model, start, limit):
|
|||||||
current_state = next_state[0]
|
current_state = next_state[0]
|
||||||
lyrics += current_state + " "
|
lyrics += current_state + " "
|
||||||
n += 1
|
n += 1
|
||||||
return lyrics
|
return lyrics, current_state
|
||||||
|
25
scrapper.py
25
scrapper.py
@ -6,6 +6,7 @@ import os
|
|||||||
import time
|
import time
|
||||||
from ScrapThread import ScrapThread
|
from ScrapThread import ScrapThread
|
||||||
from proxy_handling import proxies_validation
|
from proxy_handling import proxies_validation
|
||||||
|
from main import path
|
||||||
|
|
||||||
|
|
||||||
def connect(url, proxies_list):
|
def connect(url, proxies_list):
|
||||||
@ -136,21 +137,13 @@ def do_threading(url, selected_albums, time_stamp, proxies_list):
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def scrap_data(pink_floyd_selected_albums, black_sabbath_selected_albums, time_stamp):
|
def scrap_data(url, selected_albums, time_stamp):
|
||||||
proxies_list = proxies_validation()
|
proxies_list = proxies_validation()
|
||||||
file = open("links.txt")
|
df = do_threading(url, selected_albums, time_stamp, proxies_list)
|
||||||
path = os.path.dirname(os.path.abspath(__file__))
|
if url.split('/')[2] == 'www.azlyrics.com':
|
||||||
path = os.path.join(path, "Data")
|
filename = url.split('/')[4][:-5]
|
||||||
pink_floyd_data_frame = do_threading(file.readline()[0:-1], pink_floyd_selected_albums, time_stamp, proxies_list)
|
df.to_csv((path + filename))
|
||||||
black_sabbath_data_frame = do_threading(file.readline(), black_sabbath_selected_albums, time_stamp, proxies_list)
|
if url.split('/')[2] == 'www.tekstowo.pl':
|
||||||
pink_sabbath_data_frame = pd.concat([pink_floyd_data_frame, black_sabbath_data_frame], ignore_index=True)
|
filename = url.split(',')[1][:-5]
|
||||||
pink_floyd_data_frame.to_csv((path + "PinkFloyd.csv"))
|
df.to_csv((path + filename))
|
||||||
black_sabbath_data_frame.to_csv((path + "BlackSabbath.csv"))
|
|
||||||
pink_sabbath_data_frame.to_csv((path + "PinkSabbath.csv"))
|
|
||||||
paktofonika = do_threading(file.readline()[0:-1], [], 0.0, proxies_list)
|
|
||||||
figofagot = do_threading(file.readline(), [], 0.0, proxies_list)
|
|
||||||
braciofonika_pigo_pagot = pd.concat([paktofonika, figofagot], ignore_index=True)
|
|
||||||
paktofonika.to_csv((path + "Paktofonika.csv"))
|
|
||||||
figofagot.to_csv((path + "Bracia Figo Fagot.csv"))
|
|
||||||
braciofonika_pigo_pagot.to_csv((path + "Braciofonika Pigo Pagot.csv"))
|
|
||||||
os.remove("valid_proxy_list")
|
os.remove("valid_proxy_list")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user