Skip to content
Snippets Groups Projects
Commit 30836634 authored by Florian Jochens's avatar Florian Jochens
Browse files

added get_pub_light function

    only fetches information for the given article and not it's
    citations and references
parent 86ab6da4
Branches
No related tags found
No related merge requests found
......@@ -4,9 +4,10 @@ from input.interface import InputInterface as Input
def main(url: str):
i = Input()
print(i.get_publication(url))
#print(i.get_publication(url))
print(i.get_pub_light(url))
# print(i.get_supported_fetchers()) Useless because all classes are called the same
if __name__ == "__main__":
#main("https://doi.org/10.1021/acs.jcim.1c0023")
main("https://doi.org/10.1021/acs.jcim.5b00332")
main("https://doi.org/10.1021/acs.jcim.5b00332")
......@@ -35,6 +35,58 @@ class Fetcher(JournalFetcher):
return False
@staticmethod
def get_pub_light(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
Specific css-searches for ACS-Journals and creates Publication-instance.
"""
# Creation of Soup
try:
soup = JournalFetcher.get_soup(url)
except Exception as error:
raise error
# Raise Error if re recognizes Pattern, but url isnt correct:
# For other Urls
if soup.text.strip(" \t\n")=="Missing resource null":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
# For Dois
if soup.title is not None:
if soup.title.text == "Error: DOI Not Found":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
soup_header = soup.select('.article_header')[0]
# Creates Publication
doi_url = soup_header.select('a[title="DOI URL"]')[0].string
title = soup_header.select(".hlFld-Title")[0].text
contributors = []
for author in soup_header.select(".hlFld-ContribAuthor"):
contributors.append(author.text)
journal = soup_header.select(".cit-title")[0].text
# Replaces abbreviation with whole name
if journal in JournalFetcher.abbrev_dict:
journal = JournalFetcher.abbrev_dict[journal]
published = soup_header.select(".pub-date-value")[0].text
subjects = []
subject_soup = soup_header.select('.article_header-taxonomy')[0]
for subject in subject_soup.select('a'):
subjects.append(subject.text)
return Publication(doi_url, title, contributors, journal, published,
subjects)
def get_publication(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
......
......@@ -41,6 +41,7 @@ class InputInterface:
def get_publication(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(including it's citations and references)
Parameters
----------
......@@ -49,7 +50,8 @@ class InputInterface:
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and returns a Publication if it does.
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_publication(url)
......@@ -57,8 +59,30 @@ class InputInterface:
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_pub_light(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(only for main article)
Parameters
----------
:param url: url to a Publication
:type url: str
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_pub_light(url)
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_supported_fetchers(self):
# print(self.fetcher_classes[0].__name__) Useless right now, because all classes are called the same
# print(self.fetcher_classes[0].__name__) Useless right now,
# because all classes are called the same
return [a.__name__ for a in self.fetcher_classes]
def import_fetcher_classes(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment