diff --git a/input/input_fj.py b/input/input_fj.py new file mode 100755 index 0000000000000000000000000000000000000000..07c292ff6793a2dc4a3a16149f6eb95e2d4ff6ba --- /dev/null +++ b/input/input_fj.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +""" +Functions for information retrievel of articles from the ACS journal + +""" + +__author__ = "Florian Jochens" +__email__ = "fj@andaco.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +from bs4 import BeautifulSoup as bs +import requests as req +import sys + +class Publication: + #_registry = [] + _citations = [] + + def __init__(self, title, publication_date, contributors, doi_url): + #self._registry.append(self) + self.title = title + self.publication_date = publication_date + self.contributors = contributors + self.doi_url = doi_url + +class Citations: + def __init__(self, title, journal, contributors, doi_url): + self.title = title + self.journal = journal + self.contributors = contributors + self.doi_url = doi_url + +def get_article_info(): + header = soup.find('div', class_ = 'article_header-left pull-left') + article_title = header.find('span', class_ = 'hlFld-Title').text + publication_date = header.find('span', class_ = 'pub-date-value').text + for link in header.find('div', class_ = 'article_header-doiurl'): + doi_url = link.get('href') + subs = header.find('div', class_ = 'article_header-taxonomy') + #subjects = [] + #for sub in subs.find_all('a'): + # subjects.append(sub.get('title')) + cons = header.find('ul', class_ = 'loa') + contributors = [] + for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'): + contributors.append(con.text) + + pub = Publication(article_title, publication_date, contributors, doi_url) + return pub + +"""def get_citation_info(pub) + + return pub + +def input():""" + +if len(sys.argv) != 2: + sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0])) + exit(1) +url = sys.argv[1] +html_text = req.get(url).text +soup = bs(html_text, 'html.parser') +pub = get_article_info() + +print(f'''Article title: {pub.title} +Publication date: {pub.publication_date} + +Contributors:''') +print(*(pub.contributors), sep = ", ") +print(f''' +DOI-URL: {pub.doi_url}''')