Skip to content
Snippets Groups Projects
Commit 75810af4 authored by Zeit-Altpeter, Lukas's avatar Zeit-Altpeter, Lukas
Browse files

initial commit

parent 0bb806bd
No related branches found
No related tags found
No related merge requests found
main.py 0 → 100755
#! python
import sys
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
output_filename = "output.txt"
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Bitte Liste der URLs als Datei übergeben.")
quit(1)
path = sys.argv[1]
with open(path, "r") as f:
urls = f.read().splitlines()
if len(urls) < 1:
print("Keine URLs in Datei gefunden oder Datei nicht lesbar.")
quit(1)
titles = []
for i in tqdm(range(len(urls))):
url = urls[i]
if not url.startswith(("http://", "https://")):
url = "http://" + url
try:
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
title = soup.find("title").string
except:
title = "--- Titel konnte nicht gescraped werden. ---"
titles.append(title)
with open(output_filename, "w") as f:
f.write(("\n").join(titles))
beautifulsoup4==4.11.1
bs4==0.0.1
certifi==2021.10.8
charset-normalizer==2.0.12
idna==3.3
requests==2.27.1
soupsieve==2.3.2.post1
tqdm==4.64.0
urllib3==1.26.9
uni-hamburg.de
uni-jena.de
diese-seite-existiert-nicht.com
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment