Minor bugs repair.

This commit is contained in:
Sebastian Kutny 2023-03-28 15:30:52 +02:00
parent afeb9d579b
commit a87304e138
3 changed files with 26 additions and 23 deletions

View File

@ -1,4 +1,5 @@
https://www.azlyrics.com/p/pinkfloyd.html
https://www.azlyrics.com/b/blacksabbath.html
https://www.tekstowo.pl/piosenki_artysty,paktofonika.html
https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
https://www.tekstowo.pl/piosenki_artysty,kuki.html

24
main.py
View File

@ -1,22 +1,20 @@
import os
import random
import pandas as pd
from scrapper import scrap_data
from markov_model import clean_data
from markov_model import create_markov_model
from markov_model import generate_lyrics
blacksabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",
"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
"Dehumanizer", "Cross Purposes", "Forbidden", "13"]
"Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
"Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
"Dehumanizer", "Cross Purposes", "Forbidden", "13"]
pinkfloyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",
"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
"Wish You Were Here", "Animals", "The Wall", "The Final Cut",
"A Momentary Lapse Of Reason", "The Division Bell"]
"Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
"Wish You Were Here", "Animals", "The Wall", "The Final Cut",
"A Momentary Lapse Of Reason", "The Division Bell"]
time_stamp = 3.5
path = os.path.dirname(os.path.abspath(__file__))
@ -65,20 +63,20 @@ def scraping():
def merging():
name1 = input("Select first band file: ")
if os.path.exists(path + name1):
df1 = pd.read_csv(path + name1)
if os.path.exists(os.path.join(path, name1)):
df1 = pd.read_csv(os.path.join(path, name1))
else:
print("No such file in directory!")
return
name2 = input("Select second band file: ")
if os.path.exists(path + name2):
df2 = pd.read_csv(path + name2)
if os.path.exists(os.path.join(path, name2)):
df2 = pd.read_csv(os.path.join(path, name2))
else:
print("No such file in directory!")
return
dfResult = pd.concat([df1, df2], ignore_index=True)
result_name = input("Select name of result file: ")
dfResult.to_csv(path + result_name)
dfResult.to_csv(os.path.join(path, result_name))
def main():

View File

@ -6,7 +6,6 @@ import os
import time
from ScrapThread import ScrapThread
from proxy_handling import proxies_validation
from main import path
def connect(url, proxies_list):
@ -16,8 +15,8 @@ def connect(url, proxies_list):
main_page = None
while True:
try:
main_page = requests.get(url, headers=headers, proxies={'http': random.choice(proxies_list),
'https': random.choice(proxies_list)}, timeout=5.0)
main_page = requests.get(url, headers=headers) #, proxies={'http': random.choice(proxies_list),
# 'https': random.choice(proxies_list)}, timeout=5.0)
break
except:
continue
@ -138,12 +137,17 @@ def do_threading(url, selected_albums, time_stamp, proxies_list):
def scrap_data(url, selected_albums, time_stamp):
proxies_list = proxies_validation()
# proxies_list = proxies_validation()
proxies_list = []
df = do_threading(url, selected_albums, time_stamp, proxies_list)
path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(path, "Data")
if url.split('/')[2] == 'www.azlyrics.com':
filename = url.split('/')[4][:-5]
df.to_csv((path + filename))
filename = url.split('/')[4][:-5] + '.csv'
saving = os.path.join(path, filename)
df.to_csv(saving)
if url.split('/')[2] == 'www.tekstowo.pl':
filename = url.split(',')[1][:-5]
df.to_csv((path + filename))
os.remove("valid_proxy_list")
filename = url.split(',')[1][:-5] + '.csv'
saving = os.path.join(path, filename)
df.to_csv(saving)
# os.remove("valid_proxy_list")