added get_pub_light function

only fetches information for the given article and not it's citations and references

added get_pub_light function
30836634 · Florian Jochens · 86ab6da4 · 30836634 · 30836634 · 30836634
Commit 30836634 authored 3 years ago by Florian Jochens
--- a/example_input.py
+++ b/example_input.py
@@ -4,9 +4,10 @@ from input.interface import InputInterface as Input

 def main(url: str):
    i = Input()
-    print(i.get_publication(url))
+    #print(i.get_publication(url))
+    print(i.get_pub_light(url))
    # print(i.get_supported_fetchers()) Useless because all classes are called the same

 if __name__ == "__main__":
 	#main("https://doi.org/10.1021/acs.jcim.1c0023")
-	main("https://doi.org/10.1021/acs.jcim.5b00332")
+    main("https://doi.org/10.1021/acs.jcim.5b00332")
--- a/input/get/acs.py
+++ b/input/get/acs.py
@@ -35,6 +35,58 @@ class Fetcher(JournalFetcher):
            return False

    @staticmethod
+
+
+    def get_pub_light(url: str) -> Publication:
+        """
+        Fetches html and creates Beatifulsoup-instance in parent class.
+        Specific css-searches for ACS-Journals and creates Publication-instance.
+        """
+
+        # Creation of Soup
+        try:
+            soup = JournalFetcher.get_soup(url)
+        except Exception as error:
+            raise error
+        
+        # Raise Error if re recognizes Pattern, but url isnt correct:
+        #   For other Urls
+        if soup.text.strip(" \t\n")=="Missing resource null":
+            raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+
+        #   For Dois
+        if soup.title is not None:
+            if soup.title.text == "Error: DOI Not Found":
+                raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+
+        
+        soup_header = soup.select('.article_header')[0]
+        
+        # Creates Publication
+        doi_url = soup_header.select('a[title="DOI URL"]')[0].string
+        title = soup_header.select(".hlFld-Title")[0].text
+
+        contributors = []
+        for author in soup_header.select(".hlFld-ContribAuthor"):
+            contributors.append(author.text)
+
+        journal = soup_header.select(".cit-title")[0].text
+
+        # Replaces abbreviation with whole name
+        if journal in JournalFetcher.abbrev_dict:
+            journal = JournalFetcher.abbrev_dict[journal]
+                
+
+        published = soup_header.select(".pub-date-value")[0].text
+
+        subjects = []
+        subject_soup = soup_header.select('.article_header-taxonomy')[0]
+        for subject in subject_soup.select('a'):
+            subjects.append(subject.text)
+
+        return Publication(doi_url, title, contributors, journal, published, 
+                           subjects)
+
    def get_publication(url: str) -> Publication:
        """
        Fetches html and creates Beatifulsoup-instance in parent class.

--- a/input/interface.py
+++ b/input/interface.py
@@ -41,6 +41,7 @@ class InputInterface:
    def get_publication(self, url: str) -> Publication:
        """
        The interface-method to get a Publication-instance
+        (including it's citations and references)

        Parameters
        ----------
@@ -49,7 +50,8 @@ class InputInterface:
        :return: Publication instance or None if not supported
        """
        
-        # Checks if module supports the 'url' and returns a Publication if it does.
+        # Checks if module supports the 'url' and 
+        # returns a Publication if it does.
        for fetcher_class in InputInterface.fetcher_classes:
            if fetcher_class.can_use_url(url):
                return fetcher_class.get_publication(url)
@@ -57,8 +59,30 @@ class InputInterface:
        # No Module for given url was found
        raise ValueError("'{}' is not supported".format(url))
        
+    def get_pub_light(self, url: str) -> Publication:
+        """
+        The interface-method to get a Publication-instance 
+        (only for main article)
+
+        Parameters
+        ----------
+        :param url: url to a Publication
+        :type url: str
+        :return: Publication instance or None if not supported
+        """
+        
+        # Checks if module supports the 'url' and 
+        # returns a Publication if it does.
+        for fetcher_class in InputInterface.fetcher_classes:
+            if fetcher_class.can_use_url(url):
+                return fetcher_class.get_pub_light(url)
+            
+        # No Module for given url was found
+        raise ValueError("'{}' is not supported".format(url))
+    
    def get_supported_fetchers(self):
-        # print(self.fetcher_classes[0].__name__) Useless right now, because all classes are called the same
+        # print(self.fetcher_classes[0].__name__) Useless right now, 
+        # because all classes are called the same
        return [a.__name__ for a in self.fetcher_classes]

    def import_fetcher_classes(self):