Skip to content
Snippets Groups Projects
Commit 30836634 authored by Florian Jochens's avatar Florian Jochens
Browse files

added get_pub_light function

    only fetches information for the given article and not it's
    citations and references
parent 86ab6da4
No related branches found
No related tags found
No related merge requests found
......@@ -4,9 +4,10 @@ from input.interface import InputInterface as Input
def main(url: str):
i = Input()
print(i.get_publication(url))
#print(i.get_publication(url))
print(i.get_pub_light(url))
# print(i.get_supported_fetchers()) Useless because all classes are called the same
if __name__ == "__main__":
#main("https://doi.org/10.1021/acs.jcim.1c0023")
main("https://doi.org/10.1021/acs.jcim.5b00332")
main("https://doi.org/10.1021/acs.jcim.5b00332")
......@@ -35,6 +35,58 @@ class Fetcher(JournalFetcher):
return False
@staticmethod
def get_pub_light(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
Specific css-searches for ACS-Journals and creates Publication-instance.
"""
# Creation of Soup
try:
soup = JournalFetcher.get_soup(url)
except Exception as error:
raise error
# Raise Error if re recognizes Pattern, but url isnt correct:
# For other Urls
if soup.text.strip(" \t\n")=="Missing resource null":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
# For Dois
if soup.title is not None:
if soup.title.text == "Error: DOI Not Found":
raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
soup_header = soup.select('.article_header')[0]
# Creates Publication
doi_url = soup_header.select('a[title="DOI URL"]')[0].string
title = soup_header.select(".hlFld-Title")[0].text
contributors = []
for author in soup_header.select(".hlFld-ContribAuthor"):
contributors.append(author.text)
journal = soup_header.select(".cit-title")[0].text
# Replaces abbreviation with whole name
if journal in JournalFetcher.abbrev_dict:
journal = JournalFetcher.abbrev_dict[journal]
published = soup_header.select(".pub-date-value")[0].text
subjects = []
subject_soup = soup_header.select('.article_header-taxonomy')[0]
for subject in subject_soup.select('a'):
subjects.append(subject.text)
return Publication(doi_url, title, contributors, journal, published,
subjects)
def get_publication(url: str) -> Publication:
"""
Fetches html and creates Beatifulsoup-instance in parent class.
......
......@@ -41,6 +41,7 @@ class InputInterface:
def get_publication(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(including it's citations and references)
Parameters
----------
......@@ -49,7 +50,8 @@ class InputInterface:
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and returns a Publication if it does.
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_publication(url)
......@@ -57,8 +59,30 @@ class InputInterface:
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_pub_light(self, url: str) -> Publication:
"""
The interface-method to get a Publication-instance
(only for main article)
Parameters
----------
:param url: url to a Publication
:type url: str
:return: Publication instance or None if not supported
"""
# Checks if module supports the 'url' and
# returns a Publication if it does.
for fetcher_class in InputInterface.fetcher_classes:
if fetcher_class.can_use_url(url):
return fetcher_class.get_pub_light(url)
# No Module for given url was found
raise ValueError("'{}' is not supported".format(url))
def get_supported_fetchers(self):
# print(self.fetcher_classes[0].__name__) Useless right now, because all classes are called the same
# print(self.fetcher_classes[0].__name__) Useless right now,
# because all classes are called the same
return [a.__name__ for a in self.fetcher_classes]
def import_fetcher_classes(self):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment