From 75810af4bb2412f3cbf60a859b9dab17ab9e4380 Mon Sep 17 00:00:00 2001
From: "Zeit-Altpeter, Lukas" <lukas.zeit-altpeter@uni-hamburg.de>
Date: Tue, 10 May 2022 13:50:32 +0200
Subject: [PATCH] initial commit

---
 main.py          | 42 ++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |  9 +++++++++
 sample_list.txt  |  3 +++
 3 files changed, 54 insertions(+)
 create mode 100755 main.py
 create mode 100644 requirements.txt
 create mode 100644 sample_list.txt

diff --git a/main.py b/main.py
new file mode 100755
index 0000000..9037657
--- /dev/null
+++ b/main.py
@@ -0,0 +1,42 @@
+#! python
+import sys
+
+import requests
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+
+output_filename = "output.txt"
+
+if __name__ == '__main__':
+
+    if len(sys.argv) != 2:
+        print("Bitte Liste der URLs als Datei übergeben.")
+        quit(1)
+
+    path = sys.argv[1]
+
+    with open(path, "r") as f:
+        urls = f.read().splitlines()
+
+    if len(urls) < 1:
+        print("Keine URLs in Datei gefunden oder Datei nicht lesbar.")
+        quit(1)
+    
+    titles = []
+
+    for i in tqdm(range(len(urls))):
+        url = urls[i]
+        if not url.startswith(("http://", "https://")):
+            url = "http://" + url
+        try:
+            res = requests.get(url)
+            soup = BeautifulSoup(res.text, 'html.parser')
+            title = soup.find("title").string
+        except:
+            title = "--- Titel konnte nicht gescraped werden. ---"
+        
+        titles.append(title)
+
+    with open(output_filename, "w") as f:
+        f.write(("\n").join(titles))
+            
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3156a8b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+beautifulsoup4==4.11.1
+bs4==0.0.1
+certifi==2021.10.8
+charset-normalizer==2.0.12
+idna==3.3
+requests==2.27.1
+soupsieve==2.3.2.post1
+tqdm==4.64.0
+urllib3==1.26.9
diff --git a/sample_list.txt b/sample_list.txt
new file mode 100644
index 0000000..c7f40ba
--- /dev/null
+++ b/sample_list.txt
@@ -0,0 +1,3 @@
+uni-hamburg.de
+uni-jena.de
+diese-seite-existiert-nicht.com
-- 
GitLab