Skip to content
Snippets Groups Projects
Commit 35b2ab08 authored by Jochens, Florian's avatar Jochens, Florian
Browse files

updated version of input_fj.py

parent 0325babd
No related branches found
No related tags found
1 merge request!1input
...@@ -22,36 +22,41 @@ class Publication: ...@@ -22,36 +22,41 @@ class Publication:
#_registry = [] #_registry = []
_citations = [] _citations = []
def __init__(self, title, publication_date, contributors, doi_url): def __init__(self, title, publication_date, contributors, doi_url,
subjects, num_citations):
#self._registry.append(self) #self._registry.append(self)
self.title = title self.title = title
self.publication_date = publication_date self.publication_date = publication_date
self.contributors = contributors self.contributors = contributors
self.doi_url = doi_url self.doi_url = doi_url
self.subjects = subjects
self.num_citations = num_citations
class Citations: class Citation:
def __init__(self, title, journal, contributors, doi_url): def __init__(self, title, journal, contributors, doi_url):
self.title = title self.title = title
self.journal = journal self.journal = journal
self.contributors = contributors self.contributors = contributors
self.doi_url = doi_url self.doi_url = doi_url
def get_article_info(): def get_article_info(soup):
header = soup.find('div', class_ = 'article_header-left pull-left') header = soup.find('div', class_ = 'article_header-left pull-left')
article_title = header.find('span', class_ = 'hlFld-Title').text article_title = header.find('span', class_ = 'hlFld-Title').text
publication_date = header.find('span', class_ = 'pub-date-value').text publication_date = header.find('span', class_ = 'pub-date-value').text
for link in header.find('div', class_ = 'article_header-doiurl'): for link in header.find('div', class_ = 'article_header-doiurl'):
doi_url = link.get('href') doi_url = link.get('href')
subs = header.find('div', class_ = 'article_header-taxonomy') subs = header.find('div', class_ = 'article_header-taxonomy')
#subjects = [] subjects = []
#for sub in subs.find_all('a'): for sub in subs.find_all('a'):
# subjects.append(sub.get('title')) subjects.append(sub.get('title'))
cons = header.find('ul', class_ = 'loa') cons = header.find('ul', class_ = 'loa')
contributors = [] contributors = []
for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'): for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'):
contributors.append(con.text) contributors.append(con.text)
num_citations = header.find('a', class_ = 'internalNav').text
pub = Publication(article_title, publication_date, contributors, doi_url) pub = Publication(article_title, publication_date, contributors, doi_url,
subjects, num_citations)
return pub return pub
def get_download_url(): def get_download_url():
...@@ -60,6 +65,7 @@ def get_download_url(): ...@@ -60,6 +65,7 @@ def get_download_url():
for link in export.find_all('a'): for link in export.find_all('a'):
if link.get('title') == 'Citation and references': if link.get('title') == 'Citation and references':
url += link.get('href') url += link.get('href')
print(url)
return url return url
def download(url): # Download citation and references file def download(url): # Download citation and references file
...@@ -70,27 +76,56 @@ def download(url): # Download citation and references file ...@@ -70,27 +76,56 @@ def download(url): # Download citation and references file
print("File already exists") print("File already exists")
else: else:
print("File does not exist") print("File does not exist")
#def get_citation_info(pub)
# return pub
#def input():
if len(sys.argv) != 2:
sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
exit(1)
url = sys.argv[1]
html_text = req.get(url).text
soup = bs(html_text, 'html.parser')
pub = get_article_info()
def get_citation_info(pub, num_citations, soup):
details = soup.find('ol', class_ = 'cited-content_cbyCitation')
titles = []
for title in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-title'):
titles.append(title.text.replace('.', ''))
journal_names = []
for name in details.find_all('span',
class_ = 'cited-content_cbyCitation_journal-name'):
journal_names.append(name.text)
doi_urls = []
for url in details.find_all('a'):
doi_urls.append(url.get('href'))
contributors = []
for contrib in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-contributors'):
contributors.append(contrib.text)
for i in range(0, int(num_citations)):
pub._citations.append(Citation(titles[i], journal_names[i],
contributors[i], doi_urls[i]))
def print_pub_info(pub):
print(f'''Article title: {pub.title} print(f'''Article title: {pub.title}
Publication date: {pub.publication_date} Publication date: {pub.publication_date}
DOI-URL: {pub.doi_url}
Contributors:''') Subjects:''')
print(*(pub.subjects), sep = ", ")
print('\nContributors:')
print(*(pub.contributors), sep = ", ") print(*(pub.contributors), sep = ", ")
print(f'\nThis article is cited by the following {pub.num_citations} publications\n')
for citation in pub._citations:
print(f''' print(f'''
DOI-URL: {pub.doi_url}''') Title: {citation.title}
Journal: {citation.journal}
Contributors: {citation.contributors}
DOI-URL: {citation.doi_url}
''')
url = get_download_url() def input(url):
download(url) html_text = req.get(url).text
soup = bs(html_text, 'html.parser')
pub = get_article_info(soup)
get_citation_info(pub, int(pub.num_citations), soup)
return pub
if len(sys.argv) != 2:
sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
exit(1)
url = sys.argv[1]
pub = input(url)
print_pub_info(pub)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment