mirror of
https://github.com/WallyS02/Song-Lyrics-Generator.git
synced 2025-01-18 00:09:19 +00:00
Minor bugs repair.
This commit is contained in:
parent
afeb9d579b
commit
a87304e138
@ -1,4 +1,5 @@
|
||||
https://www.azlyrics.com/p/pinkfloyd.html
|
||||
https://www.azlyrics.com/b/blacksabbath.html
|
||||
https://www.tekstowo.pl/piosenki_artysty,paktofonika.html
|
||||
https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
|
||||
https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
|
||||
https://www.tekstowo.pl/piosenki_artysty,kuki.html
|
24
main.py
24
main.py
@ -1,22 +1,20 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from scrapper import scrap_data
|
||||
from markov_model import clean_data
|
||||
from markov_model import create_markov_model
|
||||
from markov_model import generate_lyrics
|
||||
|
||||
blacksabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",
|
||||
"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
|
||||
"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
|
||||
"Dehumanizer", "Cross Purposes", "Forbidden", "13"]
|
||||
"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
|
||||
"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
|
||||
"Dehumanizer", "Cross Purposes", "Forbidden", "13"]
|
||||
|
||||
pinkfloyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",
|
||||
"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
|
||||
"Wish You Were Here", "Animals", "The Wall", "The Final Cut",
|
||||
"A Momentary Lapse Of Reason", "The Division Bell"]
|
||||
"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
|
||||
"Wish You Were Here", "Animals", "The Wall", "The Final Cut",
|
||||
"A Momentary Lapse Of Reason", "The Division Bell"]
|
||||
|
||||
time_stamp = 3.5
|
||||
path = os.path.dirname(os.path.abspath(__file__))
|
||||
@ -65,20 +63,20 @@ def scraping():
|
||||
|
||||
def merging():
|
||||
name1 = input("Select first band file: ")
|
||||
if os.path.exists(path + name1):
|
||||
df1 = pd.read_csv(path + name1)
|
||||
if os.path.exists(os.path.join(path, name1)):
|
||||
df1 = pd.read_csv(os.path.join(path, name1))
|
||||
else:
|
||||
print("No such file in directory!")
|
||||
return
|
||||
name2 = input("Select second band file: ")
|
||||
if os.path.exists(path + name2):
|
||||
df2 = pd.read_csv(path + name2)
|
||||
if os.path.exists(os.path.join(path, name2)):
|
||||
df2 = pd.read_csv(os.path.join(path, name2))
|
||||
else:
|
||||
print("No such file in directory!")
|
||||
return
|
||||
dfResult = pd.concat([df1, df2], ignore_index=True)
|
||||
result_name = input("Select name of result file: ")
|
||||
dfResult.to_csv(path + result_name)
|
||||
dfResult.to_csv(os.path.join(path, result_name))
|
||||
|
||||
|
||||
def main():
|
||||
|
22
scrapper.py
22
scrapper.py
@ -6,7 +6,6 @@ import os
|
||||
import time
|
||||
from ScrapThread import ScrapThread
|
||||
from proxy_handling import proxies_validation
|
||||
from main import path
|
||||
|
||||
|
||||
def connect(url, proxies_list):
|
||||
@ -16,8 +15,8 @@ def connect(url, proxies_list):
|
||||
main_page = None
|
||||
while True:
|
||||
try:
|
||||
main_page = requests.get(url, headers=headers, proxies={'http': random.choice(proxies_list),
|
||||
'https': random.choice(proxies_list)}, timeout=5.0)
|
||||
main_page = requests.get(url, headers=headers) #, proxies={'http': random.choice(proxies_list),
|
||||
# 'https': random.choice(proxies_list)}, timeout=5.0)
|
||||
break
|
||||
except:
|
||||
continue
|
||||
@ -138,12 +137,17 @@ def do_threading(url, selected_albums, time_stamp, proxies_list):
|
||||
|
||||
|
||||
def scrap_data(url, selected_albums, time_stamp):
|
||||
proxies_list = proxies_validation()
|
||||
# proxies_list = proxies_validation()
|
||||
proxies_list = []
|
||||
df = do_threading(url, selected_albums, time_stamp, proxies_list)
|
||||
path = os.path.dirname(os.path.abspath(__file__))
|
||||
path = os.path.join(path, "Data")
|
||||
if url.split('/')[2] == 'www.azlyrics.com':
|
||||
filename = url.split('/')[4][:-5]
|
||||
df.to_csv((path + filename))
|
||||
filename = url.split('/')[4][:-5] + '.csv'
|
||||
saving = os.path.join(path, filename)
|
||||
df.to_csv(saving)
|
||||
if url.split('/')[2] == 'www.tekstowo.pl':
|
||||
filename = url.split(',')[1][:-5]
|
||||
df.to_csv((path + filename))
|
||||
os.remove("valid_proxy_list")
|
||||
filename = url.split(',')[1][:-5] + '.csv'
|
||||
saving = os.path.join(path, filename)
|
||||
df.to_csv(saving)
|
||||
# os.remove("valid_proxy_list")
|
||||
|
Loading…
x
Reference in New Issue
Block a user