From a87304e1387fe3a90cfc2950f54563fee726c7f1 Mon Sep 17 00:00:00 2001
From: Sebastian Kutny <sebkutny@gmail.com>
Date: Tue, 28 Mar 2023 15:30:52 +0200
Subject: [PATCH] Minor bugs repair.

---
 links.txt   |  3 ++-
 main.py     | 24 +++++++++++-------------
 scrapper.py | 22 +++++++++++++---------
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/links.txt b/links.txt
index f2e7e27..dafe680 100644
--- a/links.txt
+++ b/links.txt
@@ -1,4 +1,5 @@
 https://www.azlyrics.com/p/pinkfloyd.html
 https://www.azlyrics.com/b/blacksabbath.html
 https://www.tekstowo.pl/piosenki_artysty,paktofonika.html
-https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
\ No newline at end of file
+https://www.tekstowo.pl/piosenki_artysty,bracia_figo_fagot.html
+https://www.tekstowo.pl/piosenki_artysty,kuki.html
\ No newline at end of file
diff --git a/main.py b/main.py
index aaf8529..e93570b 100644
--- a/main.py
+++ b/main.py
@@ -1,22 +1,20 @@
 import os
 import random
-
 import pandas as pd
-
 from scrapper import scrap_data
 from markov_model import clean_data
 from markov_model import create_markov_model
 from markov_model import generate_lyrics
 
 blacksabbath_selected_albums = ["Black Sabbath", "Paranoid", "Master Of Reality", "Vol 4", "Sabbath Bloody Sabbath",
-                                 "Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
-                                 "Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
-                                 "Dehumanizer", "Cross Purposes", "Forbidden", "13"]
+                                "Sabotage", "Technical Ecstasy", "Never Say Die!", "Heaven And Hell", "Mob Rules",
+                                "Born Again", "Seventh Star", "The Eternal Idol", "Headless Cross", "Tyr",
+                                "Dehumanizer", "Cross Purposes", "Forbidden", "13"]
 
 pinkfloyd_selected_albums = ["The Piper At The Gates Of Dawn", "A Saucerful Of Secrets", "Meddle", "More", "Ummagumma",
-                              "Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
-                              "Wish You Were Here", "Animals", "The Wall", "The Final Cut",
-                              "A Momentary Lapse Of Reason", "The Division Bell"]
+                             "Atom Heart Mother", "Obscured By Clouds", "The Dark Side Of The Moon",
+                             "Wish You Were Here", "Animals", "The Wall", "The Final Cut",
+                             "A Momentary Lapse Of Reason", "The Division Bell"]
 
 time_stamp = 3.5
 path = os.path.dirname(os.path.abspath(__file__))
@@ -65,20 +63,20 @@ def scraping():
 
 def merging():
     name1 = input("Select first band file: ")
-    if os.path.exists(path + name1):
-        df1 = pd.read_csv(path + name1)
+    if os.path.exists(os.path.join(path, name1)):
+        df1 = pd.read_csv(os.path.join(path, name1))
     else:
         print("No such file in directory!")
         return
     name2 = input("Select second band file: ")
-    if os.path.exists(path + name2):
-        df2 = pd.read_csv(path + name2)
+    if os.path.exists(os.path.join(path, name2)):
+        df2 = pd.read_csv(os.path.join(path, name2))
     else:
         print("No such file in directory!")
         return
     dfResult = pd.concat([df1, df2], ignore_index=True)
     result_name = input("Select name of result file: ")
-    dfResult.to_csv(path + result_name)
+    dfResult.to_csv(os.path.join(path, result_name))
 
 
 def main():
diff --git a/scrapper.py b/scrapper.py
index b103280..e3d21ea 100644
--- a/scrapper.py
+++ b/scrapper.py
@@ -6,7 +6,6 @@ import os
 import time
 from ScrapThread import ScrapThread
 from proxy_handling import proxies_validation
-from main import path
 
 
 def connect(url, proxies_list):
@@ -16,8 +15,8 @@ def connect(url, proxies_list):
     main_page = None
     while True:
         try:
-            main_page = requests.get(url, headers=headers, proxies={'http': random.choice(proxies_list),
-                                                                    'https': random.choice(proxies_list)}, timeout=5.0)
+            main_page = requests.get(url, headers=headers) #, proxies={'http': random.choice(proxies_list),
+                                                           #         'https': random.choice(proxies_list)}, timeout=5.0)
             break
         except:
             continue
@@ -138,12 +137,17 @@ def do_threading(url, selected_albums, time_stamp, proxies_list):
 
 
 def scrap_data(url, selected_albums, time_stamp):
-    proxies_list = proxies_validation()
+    # proxies_list = proxies_validation()
+    proxies_list = []
     df = do_threading(url, selected_albums, time_stamp, proxies_list)
+    path = os.path.dirname(os.path.abspath(__file__))
+    path = os.path.join(path, "Data")
     if url.split('/')[2] == 'www.azlyrics.com':
-        filename = url.split('/')[4][:-5]
-        df.to_csv((path + filename))
+        filename = url.split('/')[4][:-5] + '.csv'
+        saving = os.path.join(path, filename)
+        df.to_csv(saving)
     if url.split('/')[2] == 'www.tekstowo.pl':
-        filename = url.split(',')[1][:-5]
-        df.to_csv((path + filename))
-    os.remove("valid_proxy_list")
+        filename = url.split(',')[1][:-5] + '.csv'
+        saving = os.path.join(path, filename)
+        df.to_csv(saving)
+    # os.remove("valid_proxy_list")