songs-lyrics-generator/main.py

import os
import random
import pandas as pd
from scrapper import scrap_data
from markov_model import clean_data, create_markov_model, generate_lyrics, self_BLEU, zipfs_law, plot_heaps_laws, cross_entropy, perplexity
import json

blacksabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",
                                "Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
                                "Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
                                "Dehumanizer", "Cross Purposes", "Forbidden", "13"]

pinkfloyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",
                             "Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
                             "Wish You Were Here", "Animals", "The Wall", "The Final Cut",
                             "A Momentary Lapse Of Reason", "The Division Bell"]

time_stamp = 3.5
path = os.path.dirname(os.path.abspath(__file__))
pathData = os.path.join(path, "Data")
pathModels = os.path.join(path, "Models")


def print_file_list(filepath):
    filelist = []
    for file in os.listdir(filepath):
        if os.path.isfile(os.path.join(filepath, file)):
            filelist.append(file)
    i = 0
    for file in filelist:
        print(i, ": ", file)
        i += 1
    return filelist


def create_model():
    filelist = print_file_list(pathData)
    name = filelist[int(input("Select datafile: "))]
    dataset = clean_data(os.path.join(pathData, name))
    n_gram = int(input("Select number of words in Markov state: "))
    model = create_markov_model(dataset, n_gram)
    model_name = input("Select model name: ")
    with open(os.path.join(pathModels, model_name) + '.json', 'w') as model_file:
        model_file.write(json.dumps(model))


def generate_song():
    filelist = print_file_list(pathModels)
    model_name = filelist[int(input("Select model: "))]
    with open(os.path.join(pathModels, model_name), 'r') as model_file:
        model = json.loads(model_file.read())
    number_of_verses = int(input("Select number of verses: "))
    words_in_verses = int(input("Select number of words in verses: ")) - len(list(model.keys())[0].split(' '))
    print('\n')
    rime = None
    song = []
    for i in range(number_of_verses):
        generated_lyrics, rime = generate_lyrics(model, random.choice(list(model.keys())), words_in_verses, True if i % 2 == 1 else False, rime)
        print(generated_lyrics)
        for state in generated_lyrics.split():
            song.append(state.lower())
    return song


def scraping():
    with open("links.txt", "r") as f:
        lines = f.readlines()
        for i in range(len(lines)):
            if i != (len(lines) - 1):
                print(str(i) + ".", lines[i][:-1])
            else:
                print(str(i) + ".", lines[i])
    line_index = int(input("Select url to scrap: "))
    url = lines[line_index]
    if line_index != (len(lines) - 1):
        url = url[:-1]
    if url.split('/')[2] == 'www.azlyrics.com':
        selected_albums_name = url.split('/')[4][:-5] + "_selected_albums"
        if selected_albums_name in globals():
            selected_albums = globals()[selected_albums_name]
            scrap_data(url, selected_albums, time_stamp)
        else:
            print("Define selected albums in global list variable in format: bandname_selected_albums")
            return
    if url.split('/')[2] == 'www.tekstowo.pl':
        scrap_data(url, [], 0.0)


def merging():
    df = pd.DataFrame(columns=['Title', 'Lyrics'])
    print("Select files to merge: ")
    filelist = []
    for file in os.listdir(pathData):
        if os.path.isfile(os.path.join(pathData, file)):
            filelist.append(file)
    while True:
        i = 0
        for file in filelist:
            print(i, ": ", file)
            i += 1
        print(i, ": That's all")
        option = int(input("Select option: "))
        if option == i:
            break
        else:
            df1 = pd.read_csv(os.path.join(pathData, filelist[option]))
            df = pd.concat([df, df1], ignore_index=True)
            filelist.pop(option)
    result_name = input("Select name of result file: ")
    df.to_csv(os.path.join(pathData, result_name))


def main():
    print("Select option:\n1. Create model based on datafile\n2. Generate lyrics with model\n3. Scrap "
          "data\n4. Merge CSV band's songs\n5. Exit")
    while True:
        selection = int(input())
        match selection:
            case 1:
                create_model()
                pass
            case 2:
                generate_song()
                pass
            case 3:
                scraping()
            case 4:
                merging()
            case 5:
                break
        print("\nCommand executed")


if __name__ == '__main__':
    main()
Initial commit 2023-03-26 13:22:02 +00:00			`import os`
			`import random`
Interface improvements. 2023-03-28 13:08:23 +00:00			`import pandas as pd`
Initial commit 2023-03-26 13:22:02 +00:00			`from scrapper import scrap_data`
Added statistical analysis based on Cross-Entropy and Perplexity. 2023-04-23 14:17:58 +00:00			`from markov_model import clean_data, create_markov_model, generate_lyrics, self_BLEU, zipfs_law, plot_heaps_laws, cross_entropy, perplexity`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`import json`
Initial commit 2023-03-26 13:22:02 +00:00
Interface improvements. 2023-03-28 13:08:23 +00:00			`blacksabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",`
Minor bugs repair. 2023-03-28 13:30:52 +00:00			`"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",`
			`"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",`
			`"Dehumanizer", "Cross Purposes", "Forbidden", "13"]`
Initial commit 2023-03-26 13:22:02 +00:00
Interface improvements. 2023-03-28 13:08:23 +00:00			`pinkfloyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",`
Minor bugs repair. 2023-03-28 13:30:52 +00:00			`"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",`
			`"Wish You Were Here", "Animals", "The Wall", "The Final Cut",`
			`"A Momentary Lapse Of Reason", "The Division Bell"]`
Initial commit 2023-03-26 13:22:02 +00:00
			`time_stamp = 3.5`
Interface change and minor improvement in data cleaning. 2023-03-27 22:21:13 +00:00			`path = os.path.dirname(os.path.abspath(__file__))`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`pathData = os.path.join(path, "Data")`
			`pathModels = os.path.join(path, "Models")`
Interface change and minor improvement in data cleaning. 2023-03-27 22:21:13 +00:00

Added statistical analysis based on Zipf's law, Heap's law and Self-BLEU evaluation. 2023-04-21 22:12:26 +00:00			`def print_file_list(filepath):`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`filelist = []`
Added statistical analysis based on Zipf's law, Heap's law and Self-BLEU evaluation. 2023-04-21 22:12:26 +00:00			`for file in os.listdir(filepath):`
			`if os.path.isfile(os.path.join(filepath, file)):`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`filelist.append(file)`
			`i = 0`
			`for file in filelist:`
			`print(i, ": ", file)`
			`i += 1`
Added statistical analysis based on Zipf's law, Heap's law and Self-BLEU evaluation. 2023-04-21 22:12:26 +00:00			`return filelist`


			`def create_model():`
			`filelist = print_file_list(pathData)`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`name = filelist[int(input("Select datafile: "))]`
			`dataset = clean_data(os.path.join(pathData, name))`
Interface change and minor improvement in data cleaning. 2023-03-27 22:21:13 +00:00			`n_gram = int(input("Select number of words in Markov state: "))`
Added rhymes in lyrics generation. 2023-03-28 15:25:17 +00:00			`model = create_markov_model(dataset, n_gram)`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`model_name = input("Select model name: ")`
			`with open(os.path.join(pathModels, model_name) + '.json', 'w') as model_file:`
			`model_file.write(json.dumps(model))`


			`def generate_song():`
Added statistical analysis based on Zipf's law, Heap's law and Self-BLEU evaluation. 2023-04-21 22:12:26 +00:00			`filelist = print_file_list(pathModels)`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`model_name = filelist[int(input("Select model: "))]`
			`with open(os.path.join(pathModels, model_name), 'r') as model_file:`
			`model = json.loads(model_file.read())`
			`number_of_verses = int(input("Select number of verses: "))`
			`words_in_verses = int(input("Select number of words in verses: ")) - len(list(model.keys())[0].split(' '))`
Interface change and minor improvement in data cleaning. 2023-03-27 22:21:13 +00:00			`print('\n')`
Added rhymes in lyrics generation. 2023-03-28 15:25:17 +00:00			`rime = None`
Added statistical analysis based on Zipf's law, Heap's law and Self-BLEU evaluation. 2023-04-21 22:12:26 +00:00			`song = []`
Interface change and minor improvement in data cleaning. 2023-03-27 22:21:13 +00:00			`for i in range(number_of_verses):`
Major updates in model + gathered some data. 2023-04-04 16:01:11 +00:00			`generated_lyrics, rime = generate_lyrics(model, random.choice(list(model.keys())), words_in_verses, True if i % 2 == 1 else False, rime)`
Interface change and minor improvement in data cleaning. 2023-03-27 22:21:13 +00:00			`print(generated_lyrics)`
Added statistical analysis based on Zipf's law, Heap's law and Self-BLEU evaluation. 2023-04-21 22:12:26 +00:00			`for state in generated_lyrics.split():`
			`song.append(state.lower())`
			`return song`
Interface improvements. 2023-03-28 13:08:23 +00:00

			`def scraping():`
			`with open("links.txt", "r") as f:`
			`lines = f.readlines()`
			`for i in range(len(lines)):`
			`if i != (len(lines) - 1):`
			`print(str(i) + ".", lines[i][:-1])`
			`else:`
			`print(str(i) + ".", lines[i])`
			`line_index = int(input("Select url to scrap: "))`
			`url = lines[line_index]`
			`if line_index != (len(lines) - 1):`
			`url = url[:-1]`
			`if url.split('/')[2] == 'www.azlyrics.com':`
			`selected_albums_name = url.split('/')[4][:-5] + "_selected_albums"`
			`if selected_albums_name in globals():`
			`selected_albums = globals()[selected_albums_name]`
			`scrap_data(url, selected_albums, time_stamp)`
			`else:`
			`print("Define selected albums in global list variable in format: bandname_selected_albums")`
			`return`
			`if url.split('/')[2] == 'www.tekstowo.pl':`
			`scrap_data(url, [], 0.0)`


			`def merging():`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`df = pd.DataFrame(columns=['Title', 'Lyrics'])`
			`print("Select files to merge: ")`
			`filelist = []`
			`for file in os.listdir(pathData):`
			`if os.path.isfile(os.path.join(pathData, file)):`
			`filelist.append(file)`
			`while True:`
			`i = 0`
			`for file in filelist:`
			`print(i, ": ", file)`
			`i += 1`
			`print(i, ": That's all")`
			`option = int(input("Select option: "))`
			`if option == i:`
			`break`
			`else:`
			`df1 = pd.read_csv(os.path.join(pathData, filelist[option]))`
			`df = pd.concat([df, df1], ignore_index=True)`
			`filelist.pop(option)`
Interface improvements. 2023-03-28 13:08:23 +00:00			`result_name = input("Select name of result file: ")`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`df.to_csv(os.path.join(pathData, result_name))`
Initial commit 2023-03-26 13:22:02 +00:00

			`def main():`
Added statistical analysis based on Zipf's law, Heap's law and Self-BLEU evaluation. 2023-04-21 22:12:26 +00:00			`print("Select option:\n1. Create model based on datafile\n2. Generate lyrics with model\n3. Scrap "`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`"data\n4. Merge CSV band's songs\n5. Exit")`
Initial commit 2023-03-26 13:22:02 +00:00			`while True:`
			`selection = int(input())`
			`match selection:`
			`case 1:`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`create_model()`
			`pass`
Initial commit 2023-03-26 13:22:02 +00:00			`case 2:`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`generate_song()`
			`pass`
Initial commit 2023-03-26 13:22:02 +00:00			`case 3:`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`scraping()`
Initial commit 2023-03-26 13:22:02 +00:00			`case 4:`
Added saving and reading model from file plus improvements in dataset handling options. 2023-04-18 13:03:00 +00:00			`merging()`
			`case 5:`
Initial commit 2023-03-26 13:22:02 +00:00			`break`
			`print("\nCommand executed")`


			`if __name__ == '__main__':`
			`main()`