From 0325babd8bae26e3d853429eb8484e80d5809d35 Mon Sep 17 00:00:00 2001 From: "Jochens, Florian" <fj@andaco.de> Date: Thu, 4 Nov 2021 12:37:30 +0100 Subject: [PATCH] implemented get_acrticle_info() and get_download_url() --- input/input_fj.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/input/input_fj.py b/input/input_fj.py index 07c292f..25d06b8 100755 --- a/input/input_fj.py +++ b/input/input_fj.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Functions for information retrievel of articles from the ACS journal +Functions for information retrievel of articles from the ACS journal JCIM """ @@ -16,6 +16,7 @@ __status__ = "Production" from bs4 import BeautifulSoup as bs import requests as req import sys +from pathlib import Path class Publication: #_registry = [] @@ -53,11 +54,27 @@ def get_article_info(): pub = Publication(article_title, publication_date, contributors, doi_url) return pub -"""def get_citation_info(pub) +def get_download_url(): + export = soup.find('div', class_ = 'cit-download-dropdown_content') + url = 'https://pubs.acs.org' + for link in export.find_all('a'): + if link.get('title') == 'Citation and references': + url += link.get('href') + return url + +def download(url): # Download citation and references file + if url.find('='): + filename = url.rsplit('=', 1)[1] + path = Path(('./files/' + filename)) + if path.is_file(): + print("File already exists") + else: + print("File does not exist") +#def get_citation_info(pub) - return pub +# return pub -def input():""" +#def input(): if len(sys.argv) != 2: sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0])) @@ -74,3 +91,6 @@ Contributors:''') print(*(pub.contributors), sep = ", ") print(f''' DOI-URL: {pub.doi_url}''') + +url = get_download_url() +download(url) -- GitLab