Skip to content
Snippets Groups Projects
download.py 5.35 KiB
Newer Older
Schlüschen, Timo's avatar
Schlüschen, Timo committed

from tkinter import filedialog
from tkinter import *
from tkinter.ttk import *
import re
from pathlib import Path
from bs4 import BeautifulSoup
import requests
from pathlib import Path


class Window(Frame):
    def __init__(self, master=None):
        Frame.__init__(self, master)
        self.master = master

        self.download_dir_str = StringVar()
        self.download_url_str = StringVar()
        self.download_links = []

        self.init_window()


    def init_window(self):
        self.master.title('FIONA PDF downloader')

        self.main_frame = Frame(self.master)
        self.main_frame.pack()

        self.url_lbl = Label(self.main_frame, text="URL: ")
        self.url_lbl.grid(row=0, column=0, sticky="w", padx=10)

        self.url_field = Entry(self.main_frame, textvariable=self.download_url_str)
        self.url_field.grid(row=0, column=1, pady=10, padx=10)

        self.download_btn = Button(self.main_frame, text="Download Files", command=self.download_files)
        self.download_btn.grid(row=0, column=2, pady=10, padx=10)

        self.path_lbl= Label(self.main_frame, text="Download directory: ")
        self.path_lbl.grid(row=1, column=0, pady=10, padx=10)

        self.path_show = Label(self.main_frame, textvariable=self.download_dir_str)
        self.path_show.grid(row=1, column=1, pady=10, padx=10)

        self.path_btn = Button(self.main_frame, text="Set Directory", command=self.set_download_dir)
        self.path_btn.grid(row=1, column=2, pady=10, padx=10)

        self.link_frame = Frame(self.main_frame)
        self.link_frame.grid(row=2, columnspan=3, padx=10, pady=10)

    def set_download_dir(self):
        self.download_dir = filedialog.askdirectory()
        self.download_dir_str.set(self.download_dir)

    def clear_view(self):
        try:
            for widget in self.link_frame.winfo_children():
                widget.destroy()
        except AttributeError:
            pass

    def download_files(self):
        self.clear_view()

        if self.download_url_str:
            if hasattr(self, 'download_dir'):
                self.download_links = []
                try:
                    print(self.download_url_str)
                    r = requests.get(self.download_url_str.get())
                    data = r.text
                    soup = BeautifulSoup(data)
                    current_link = ""
                    i = 1
                    link_header_lbl = Label(self.link_frame, text="PDF name")
                    link_header_lbl.grid(row=0, column=0, padx=10, pady=10)
                    link_header_status_lbl = Label(self.link_frame, text="Downloaded?")
                    link_header_status_lbl.grid(row=0, column=1, padx=10, pady=10)

                    for link in soup.find_all('a'):

                        current_link = link.get('href')
                        print(current_link)
                        
                        try:
                            if current_link != None and current_link.endswith('pdf'):
                                self.download_links.append(current_link)
                                file_name = self.fionalize(current_link.rsplit('/', 1)[-1])
                                link_lbl = Label(self.link_frame, text=file_name)
                                link_lbl.grid(row=i, column=0, padx=10, pady=5)
                                
                                try:
                                    pdf_file = requests.get(current_link)
                                    path = Path(self.download_dir, file_name)
                                    open(path, 'wb').write(pdf_file.content)
                                    status_lbl = Label(self.link_frame, text="Yes")
                                    status_lbl.grid(row=i, column=1, padx=10, pady=5)

                                except Exception as e:
                                    print(e)
                                    status_lbl = Label(self.link_frame, text="No (Error)")
                                    status_lbl.grid(row=i, column=1, padx=10, pady=5)

                                i += 1
                        except:
                            continue

                except Exception as e:
                    print(e)
            else:
                return
        else:
            return

    def fionalize(self, string):
        newName=""
        ending=""

        regEnding = r"\.[a-zA-Z\d]*$"

        if re.search(regEnding, string) is not None:
            ending=re.search(regEnding, string).group(0)
            string=re.sub("\.[a-zA-Z\d]*$", "", string)

        for l in string:
            if re.search("[A-Z]", l):
                newName += l.lower()
                continue
            elif re.search("[a-z\d-]", l):
                newName += l
                continue
            elif re.search("[\s]", l):
                continue
            elif re.search("[\_]", l):
                newName += "-"
                continue
            elif re.search("[ä]", l):
                newName += "ae"
                continue
            elif re.search("[ö]", l):
                newName += "oe"
                continue
            elif re.search("[ü]", l):
                newName += "ue"
                continue
            else:
                continue
        return(newName + ending)

root = Tk()
root.minsize(800, 600)
root.geometry("800x600")
app = Window(root)

root.mainloop()