Skip to content
Snippets Groups Projects
Commit 1868d958 authored by Jochens, Florian's avatar Jochens, Florian
Browse files

Merge branch 'main' into 'main'

input

See merge request !1
parents e5604131 433bf784
No related branches found
No related tags found
1 merge request!1input
# Projekt CiS-Projekt 2021/22
Input-Skripts
#!/usr/bin/env python3
"""
Functions for information retrieval of articles from the ACS journal JCIM
"""
__author__ = "Florian Jochens"
__email__ = "fj@andaco.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
from pathlib import Path
class Publication:
#_registry = []
_citations = []
def __init__(self, title, publication_date, contributors, doi_url,
subjects, num_citations):
#self._registry.append(self)
self.title = title
self.publication_date = publication_date
self.contributors = contributors
self.doi_url = doi_url
self.subjects = subjects
self.num_citations = num_citations
class Citation:
def __init__(self, title, journal, contributors, doi_url):
self.title = title
self.journal = journal
self.contributors = contributors
self.doi_url = doi_url
def get_article_info(soup):
header = soup.find('div', class_ = 'article_header-left pull-left')
article_title = header.find('span', class_ = 'hlFld-Title').text
publication_date = header.find('span', class_ = 'pub-date-value').text
for link in header.find('div', class_ = 'article_header-doiurl'):
doi_url = link.get('href')
subs = header.find('div', class_ = 'article_header-taxonomy')
subjects = []
for sub in subs.find_all('a'):
subjects.append(sub.get('title'))
cons = header.find('ul', class_ = 'loa')
contributors = []
for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'):
contributors.append(con.text)
numc = header.find('div', class_ = 'articleMetrics_count')
if not numc.a:
num_citations = 0
else:
num_citations = numc.a.text
pub = Publication(article_title, publication_date, contributors, doi_url,
subjects, num_citations)
return pub
def get_download_url():
export = soup.find('div', class_ = 'cit-download-dropdown_content')
url = 'https://pubs.acs.org'
for link in export.find_all('a'):
if link.get('title') == 'Citation and references':
url += link.get('href')
print(url)
return url
def download(url): # Download citation and references file
if url.find('='):
filename = url.rsplit('=', 1)[1]
path = Path(('./files/' + filename))
if path.is_file():
print("File already exists")
else:
print("File does not exist")
def get_citation_info(pub, num_citations, soup):
details = soup.find('ol', class_ = 'cited-content_cbyCitation')
titles = []
for title in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-title'):
titles.append(title.text.replace('.', ''))
journal_names = []
for name in details.find_all('span',
class_ = 'cited-content_cbyCitation_journal-name'):
journal_names.append(name.text)
doi_urls = []
for url in details.find_all('a'):
doi_urls.append(url.get('href'))
contributors = []
for contrib in details.find_all('span',
class_ = 'cited-content_cbyCitation_article-contributors'):
contributors.append(contrib.text)
for i in range(0, int(num_citations)):
pub._citations.append(Citation(titles[i], journal_names[i],
contributors[i], doi_urls[i]))
def print_pub_info(pub):
print(f'''Article title: {pub.title}
Publication date: {pub.publication_date}
DOI-URL: {pub.doi_url}
Subjects:''')
print(*(pub.subjects), sep = ", ")
print('\nContributors:')
print(*(pub.contributors), sep = ", ")
if int(pub.num_citations) > 0:
if int(pub.num_citations) == 1:
print(f'\nThis publication is cited by the following publication:\n')
else:
print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n')
for citation in pub._citations:
print(f'''
Title: {citation.title}
Journal: {citation.journal}
Contributors: {citation.contributors}
DOI-URL: {citation.doi_url}
''')
else:
print('\nThis publication is not cited by any other publication.')
def input(url):
html_text = req.get(url).text
soup = bs(html_text, 'html.parser')
pub = get_article_info(soup)
if int(pub.num_citations) > 0:
get_citation_info(pub, int(pub.num_citations), soup)
return pub
#if len(sys.argv) != 2:
# sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
# exit(1)
#url = sys.argv[1]
#pub = input(url)
#print_pub_info(pub)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment