2023-03-26 13:22:02 +00:00
|
|
|
import os
|
|
|
|
import random
|
2023-03-28 13:08:23 +00:00
|
|
|
import pandas as pd
|
2023-03-26 13:22:02 +00:00
|
|
|
from scrapper import scrap_data
|
2023-04-23 14:17:58 +00:00
|
|
|
from markov_model import clean_data, create_markov_model, generate_lyrics, self_BLEU, zipfs_law, plot_heaps_laws, cross_entropy, perplexity
|
2023-04-18 13:03:00 +00:00
|
|
|
import json
|
2023-03-26 13:22:02 +00:00
|
|
|
|
2023-03-28 13:08:23 +00:00
|
|
|
blacksabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",
|
2023-03-28 13:30:52 +00:00
|
|
|
"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
|
|
|
|
"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
|
|
|
|
"Dehumanizer", "Cross Purposes", "Forbidden", "13"]
|
2023-03-26 13:22:02 +00:00
|
|
|
|
2023-03-28 13:08:23 +00:00
|
|
|
pinkfloyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",
|
2023-03-28 13:30:52 +00:00
|
|
|
"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
|
|
|
|
"Wish You Were Here", "Animals", "The Wall", "The Final Cut",
|
|
|
|
"A Momentary Lapse Of Reason", "The Division Bell"]
|
2023-03-26 13:22:02 +00:00
|
|
|
|
|
|
|
time_stamp = 3.5
|
2023-03-27 22:21:13 +00:00
|
|
|
path = os.path.dirname(os.path.abspath(__file__))
|
2023-04-18 13:03:00 +00:00
|
|
|
pathData = os.path.join(path, "Data")
|
|
|
|
pathModels = os.path.join(path, "Models")
|
2023-03-27 22:21:13 +00:00
|
|
|
|
|
|
|
|
2023-04-21 22:12:26 +00:00
|
|
|
def print_file_list(filepath):
|
2023-04-18 13:03:00 +00:00
|
|
|
filelist = []
|
2023-04-21 22:12:26 +00:00
|
|
|
for file in os.listdir(filepath):
|
|
|
|
if os.path.isfile(os.path.join(filepath, file)):
|
2023-04-18 13:03:00 +00:00
|
|
|
filelist.append(file)
|
|
|
|
i = 0
|
|
|
|
for file in filelist:
|
|
|
|
print(i, ": ", file)
|
|
|
|
i += 1
|
2023-04-21 22:12:26 +00:00
|
|
|
return filelist
|
|
|
|
|
|
|
|
|
|
|
|
def create_model():
|
|
|
|
filelist = print_file_list(pathData)
|
2023-04-18 13:03:00 +00:00
|
|
|
name = filelist[int(input("Select datafile: "))]
|
|
|
|
dataset = clean_data(os.path.join(pathData, name))
|
2023-03-27 22:21:13 +00:00
|
|
|
n_gram = int(input("Select number of words in Markov state: "))
|
2023-03-28 15:25:17 +00:00
|
|
|
model = create_markov_model(dataset, n_gram)
|
2023-04-18 13:03:00 +00:00
|
|
|
model_name = input("Select model name: ")
|
|
|
|
with open(os.path.join(pathModels, model_name) + '.json', 'w') as model_file:
|
|
|
|
model_file.write(json.dumps(model))
|
|
|
|
|
|
|
|
|
|
|
|
def generate_song():
|
2023-04-21 22:12:26 +00:00
|
|
|
filelist = print_file_list(pathModels)
|
2023-04-18 13:03:00 +00:00
|
|
|
model_name = filelist[int(input("Select model: "))]
|
|
|
|
with open(os.path.join(pathModels, model_name), 'r') as model_file:
|
|
|
|
model = json.loads(model_file.read())
|
|
|
|
number_of_verses = int(input("Select number of verses: "))
|
|
|
|
words_in_verses = int(input("Select number of words in verses: ")) - len(list(model.keys())[0].split(' '))
|
2023-03-27 22:21:13 +00:00
|
|
|
print('\n')
|
2023-03-28 15:25:17 +00:00
|
|
|
rime = None
|
2023-04-21 22:12:26 +00:00
|
|
|
song = []
|
2023-03-27 22:21:13 +00:00
|
|
|
for i in range(number_of_verses):
|
2023-04-04 16:01:11 +00:00
|
|
|
generated_lyrics, rime = generate_lyrics(model, random.choice(list(model.keys())), words_in_verses, True if i % 2 == 1 else False, rime)
|
2023-03-27 22:21:13 +00:00
|
|
|
print(generated_lyrics)
|
2023-04-21 22:12:26 +00:00
|
|
|
for state in generated_lyrics.split():
|
|
|
|
song.append(state.lower())
|
|
|
|
return song
|
2023-03-28 13:08:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
def scraping():
|
|
|
|
with open("links.txt", "r") as f:
|
|
|
|
lines = f.readlines()
|
|
|
|
for i in range(len(lines)):
|
|
|
|
if i != (len(lines) - 1):
|
|
|
|
print(str(i) + ".", lines[i][:-1])
|
|
|
|
else:
|
|
|
|
print(str(i) + ".", lines[i])
|
|
|
|
line_index = int(input("Select url to scrap: "))
|
|
|
|
url = lines[line_index]
|
|
|
|
if line_index != (len(lines) - 1):
|
|
|
|
url = url[:-1]
|
|
|
|
if url.split('/')[2] == 'www.azlyrics.com':
|
|
|
|
selected_albums_name = url.split('/')[4][:-5] + "_selected_albums"
|
|
|
|
if selected_albums_name in globals():
|
|
|
|
selected_albums = globals()[selected_albums_name]
|
|
|
|
scrap_data(url, selected_albums, time_stamp)
|
|
|
|
else:
|
|
|
|
print("Define selected albums in global list variable in format: bandname_selected_albums")
|
|
|
|
return
|
|
|
|
if url.split('/')[2] == 'www.tekstowo.pl':
|
|
|
|
scrap_data(url, [], 0.0)
|
|
|
|
|
|
|
|
|
|
|
|
def merging():
|
2023-04-18 13:03:00 +00:00
|
|
|
df = pd.DataFrame(columns=['Title', 'Lyrics'])
|
|
|
|
print("Select files to merge: ")
|
|
|
|
filelist = []
|
|
|
|
for file in os.listdir(pathData):
|
|
|
|
if os.path.isfile(os.path.join(pathData, file)):
|
|
|
|
filelist.append(file)
|
|
|
|
while True:
|
|
|
|
i = 0
|
|
|
|
for file in filelist:
|
|
|
|
print(i, ": ", file)
|
|
|
|
i += 1
|
|
|
|
print(i, ": That's all")
|
|
|
|
option = int(input("Select option: "))
|
|
|
|
if option == i:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
df1 = pd.read_csv(os.path.join(pathData, filelist[option]))
|
|
|
|
df = pd.concat([df, df1], ignore_index=True)
|
|
|
|
filelist.pop(option)
|
2023-03-28 13:08:23 +00:00
|
|
|
result_name = input("Select name of result file: ")
|
2023-04-18 13:03:00 +00:00
|
|
|
df.to_csv(os.path.join(pathData, result_name))
|
2023-03-26 13:22:02 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2023-04-21 22:12:26 +00:00
|
|
|
print("Select option:\n1. Create model based on datafile\n2. Generate lyrics with model\n3. Scrap "
|
2023-04-18 13:03:00 +00:00
|
|
|
"data\n4. Merge CSV band's songs\n5. Exit")
|
2023-03-26 13:22:02 +00:00
|
|
|
while True:
|
|
|
|
selection = int(input())
|
|
|
|
match selection:
|
|
|
|
case 1:
|
2023-04-18 13:03:00 +00:00
|
|
|
create_model()
|
|
|
|
pass
|
2023-03-26 13:22:02 +00:00
|
|
|
case 2:
|
2023-04-18 13:03:00 +00:00
|
|
|
generate_song()
|
|
|
|
pass
|
2023-03-26 13:22:02 +00:00
|
|
|
case 3:
|
2023-04-18 13:03:00 +00:00
|
|
|
scraping()
|
2023-03-26 13:22:02 +00:00
|
|
|
case 4:
|
2023-04-18 13:03:00 +00:00
|
|
|
merging()
|
|
|
|
case 5:
|
2023-03-26 13:22:02 +00:00
|
|
|
break
|
|
|
|
print("\nCommand executed")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|