Merge remote-tracking branch 'upstream/main' into main

85ece1d0 · Stahl, Merle · 6594b1a7 · 95e73bd7 · 85ece1d0 · 85ece1d0
Commit 85ece1d0 authored 3 years ago by Stahl, Merle
--- a/.gitignore
+++ b/.gitignore
+**/__pycache__/
--- a/count_journal.py
+++ b/count_journal.py
+#!/usr/bin/env python3
+from input.interface import InputInterface as Input
+def count_journals(url: str):
+    inter = Input()
+    pub = inter.get_publication(url)
+    if pub.citations:
+        for citation in pub.citations:
+            journal = citation.journal
+            if journal in cit:
+                cit[journal] += 1
+            else:
+                cit[journal] = 1
+    if pub.references:
+        for reference in pub.references:
+            journal = reference.journal
+            if journal in cit:
+                cit[journal] += 1
+            else:
+                cit[journal] = 1
+if __name__ == "__main__":
+    cit = {}
+    count_journals("https://doi.org/10.1021/acs.jcim.1c00203")
+    count_journals("https://doi.org/10.1021/acs.jcim.6b00561")
+    count_journals("https://doi.org/10.1021/acs.jcim.6b00613")
+    count_journals("https://doi.org/10.1021/acs.jcim.1c00917")
+    count_journals("https://doi.org/10.1021/acs.jmedchem.0c01332")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.1c00290")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acsenvironau.1c00007")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.7b01162")
+    cit = dict(sorted(cit.items(), key=lambda item: item[1]))
+    for journal in cit:
+        if journal != "":
+            print(f'{journal}: {cit[journal]}')
--- a/example_input.py
+++ b/example_input.py
+#!/usr/bin/env python3
+from input.interface import InputInterface as Input
+def main(url: str):
+    i = Input()
+    #print(i.get_publication(url))
+    print(i.get_pub_light(url))
+    # print(i.get_supported_fetchers()) Useless because all classes are called the same
+if __name__ == "__main__":
+	#main("https://doi.org/10.1021/acs.jcim.1c0023")
+    main("https://doi.org/10.1021/acs.jcim.5b00332")
--- a/input/README.md
+++ b/input/README.md
+# Projekt CiS-Projekt 2021/22
+Input-Package to fetch publication information with a given url.
+## Usage/Examples
+```python
+from input.interface import InputInterface as Input
+from input.publication import Publication
+def main(url):
+    inter = Input()
+    try:
+        pub = inter.get_publication(url)
+    except Exception as error:
+        raise error
+    print(pub)
+    pub.title = "Cool new Title"
+    print(pub)
+if __name__ == "__main__":
+    main("https://doi.org/10.1021/acs.chemrev.8b00728")
+```
+The expected results of calling this methode are:
+| Input-Url | Result    |
+|-----------|-----------|
+| supported & correct| A publication Instance |
+| supported & uncorrect| ValueError|
+| not supported | ValueError|
+Supported Url are urls, which comply with the url-pattern of supported Journals.  
+### Supported Journals:
+- ACS-Journals
+- (Nature-Journals)
+## Testing
+``` c
+python -m unittest input/test/<file.py> -v
+# for all tests in directory
+python -m unittest discover input/test -v
+```
+## Authors
+- Florian Jochens
+- Sam Ockenden
+- Julius Schenk
\ No newline at end of file
--- a/input/__init__.py
+++ b/input/__init__.py
--- a/input/get/__init__.py
+++ b/input/get/__init__.py
--- a/input/get/acs.py
+++ b/input/get/acs.py
+#!/usr/bin/env python3
+"""
+Child class of JournalFetcher
+Usage: Check if Url can be used with 'can_use_url'
+       and then fetch publication with 'get_publication'
+"""
+import re
+from input.get.journal_fetcher import JournalFetcher
+from input.publication import Publication, Citation
+class Fetcher(JournalFetcher):
+    """
+    Specific Fetcher for the ACS journals.
+    """
+    # Constant for the abbreviations of the supported Journals
+    SUPPORTED_JOURNALS = ['1021']
+    @staticmethod
+    def can_use_url(url: str) -> str:
+        """
+        Uses Regex to extract journal specific substrings in Doi.
+        TODO: Support non Doi-urls
+        """
+        matched_url = re.match(r'^(https?://)?(doi.org/|pubs.acs.org/doi/)?(10.(\d{4})/\w+.\S+)', url.strip(". \t\r\n"))
+        #Checks if match exists
+        if matched_url is not None:
+            return matched_url[4] in Fetcher.SUPPORTED_JOURNALS
+        else:
+            return False
+    @staticmethod
+    def get_pub_light(url: str) -> Publication:
+        """
+        Fetches html and creates Beatifulsoup-instance in parent class.
+        Specific css-searches for ACS-Journals and creates Publication-instance.
+        """
+        # Creation of Soup
+        try:
+            soup = JournalFetcher.get_soup(url)
+        except Exception as error:
+            raise error
+        # Raise Error if re recognizes Pattern, but url isnt correct:
+        #   For other Urls
+        if soup.text.strip(" \t\n")=="Missing resource null":
+            raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        #   For Dois
+        if soup.title is not None:
+            if soup.title.text == "Error: DOI Not Found":
+                raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        soup_header = soup.select('.article_header')[0]
+        # Creates Publication
+        doi_url = soup_header.select('a[title="DOI URL"]')[0].string
+        title = soup_header.select(".hlFld-Title")[0].text
+        contributors = []
+        for author in soup_header.select(".hlFld-ContribAuthor"):
+            contributors.append(author.text)
+        journal = soup_header.select(".cit-title")[0].text
+        # Replaces abbreviation with whole name
+        if journal in JournalFetcher.abbrev_dict:
+            journal = JournalFetcher.abbrev_dict[journal]
+        published = soup_header.select(".pub-date-value")[0].text
+        subjects = []
+        subject_soup = soup_header.select('.article_header-taxonomy')[0]
+        for subject in subject_soup.select('a'):
+            subjects.append(subject.text)
+        return Publication(doi_url, title, contributors, journal, published, 
+                           subjects)
+    def get_publication(url: str) -> Publication:
+        """
+        Fetches html and creates Beatifulsoup-instance in parent class.
+        Specific css-searches for ACS-Journals and creates Publication-instance.
+        """
+        # Creation of Soup
+        try:
+            soup = JournalFetcher.get_soup(url)
+        except Exception as error:
+            raise error
+        # Raise Error if re recognizes Pattern, but url isnt correct:
+        #   For other Urls
+        if soup.text.strip(" \t\n")=="Missing resource null":
+            raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        #   For Dois
+        if soup.title is not None:
+            if soup.title.text == "Error: DOI Not Found":
+                raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        soup_header = soup.select('.article_header')[0]
+        #Could be used for more specific search
+        ref_cit_soup = soup
+        # Creates Publication
+        doi_url = soup_header.select('a[title="DOI URL"]')[0].string
+        title = soup_header.select(".hlFld-Title")[0].text
+        contributors = []
+        for author in soup_header.select(".hlFld-ContribAuthor"):
+            contributors.append(author.text)
+        journal = soup_header.select(".cit-title")[0].text
+        # Replaces abbreviation with whole name
+        if journal in JournalFetcher.abbrev_dict:
+            journal = JournalFetcher.abbrev_dict[journal]
+        published = soup_header.select(".pub-date-value")[0].text
+        subjects = []
+        subject_soup = soup_header.select('.article_header-taxonomy')[0]
+        for subject in subject_soup.select('a'):
+            subjects.append(subject.text)
+        references = []
+        references_soup = ref_cit_soup.select('ol#references')
+        if references_soup != []:
+            for reference in references_soup[0].select('li'):
+                if reference.select('.refDoi') != []:
+                    ref_doi = "https://doi.org/{}".format(reference.select('.refDoi')[0].text.strip()[5:])
+                else: 
+        #           No Doi -> No Paper
+                    continue
+                ref_title = reference.select('.NLM_article-title')[0].text\
+                        if reference.select('.NLM_article-title') != [] else None
+                ref_journal = reference.select('i')[0].text\
+                        if reference.select('i') != [] else None
+                # Replaces abbreviation with whole name
+                if ref_journal in JournalFetcher.abbrev_dict:
+                    ref_journal = JournalFetcher.abbrev_dict[ref_journal]
+                ref_contributors=[]
+                for author in reference.select('.NLM_contrib-group'):
+                    ref_contributors.append(author.text.replace("\n", " ").replace("\r", ""))
+                references.append(Citation(ref_doi, ref_title, ref_journal, ref_contributors, cit_type="Reference"))
+        citations = []
+        citation_soup = ref_cit_soup.select('.cited-content_cbyCitation')
+        if citation_soup != []:
+            for citation in citation_soup[0].select('li'):
+                if citation.select('a[title="DOI URL"]') != []: 
+                    cit_doi = citation.select('a[title="DOI URL"]')[0].text
+                else:
+        #           No Doi -> No Paper
+                    continue
+                cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\
+                        if citation.select('.cited-content_cbyCitation_article-title')!= [] else None
+                cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\
+                        if citation.select('.cited-content_cbyCitation_journal-name') != [] else None
+                # Replaces abbreviation with whole name
+                if cit_journal in JournalFetcher.abbrev_dict:
+                    cit_journal = JournalFetcher.abbrev_dict[cit_journal]
+                cit_contributors =[]
+                cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0]\
+                    .text.replace("\n", " ").replace("\r", "").split(', ')
+        #           clean up of the last Entry
+                cit_contributors_last = cit_contributors.pop().strip(". ")
+                if cit_contributors_last != '':
+                    cit_contributors.append(cit_contributors_last)  
+                citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation"))
+        return Publication(doi_url, title, contributors, journal, published
+                            , subjects, references, citations)
--- a/input/get/journal_fetcher.py
+++ b/input/get/journal_fetcher.py
+#!/usr/bin/env python3
+"""
+Parent class for specific Journal
+"""
+from abc import ABCMeta, abstractmethod
+from bs4 import BeautifulSoup
+import requests
+from input.publication import Publication
+class JournalFetcher(metaclass=ABCMeta):
+    """
+    This is a abstract-class for fetcher modules
+    """
+    @staticmethod
+    def get_soup(url: str) -> BeautifulSoup:
+        """
+        Retrieves webside-html and returns a BeautifulSoup-instance
+        Parameters:
+        -----------
+        :type url: str
+        :param url: doi-url to a publication
+        :return: BeatifulSoup-instance
+        """
+        try:
+            req = requests.get(url)
+        except  requests.exceptions.HTTPError as err:
+            raise SystemExit(err)
+        return BeautifulSoup(req.content, 'html.parser')
+    @staticmethod
+    @abstractmethod
+    def can_use_url(url: str) -> bool:
+        """
+        Abstract-function to be implemented in subclass.
+        Checks if given url links to a supported journal
+        """
+        raise AttributeError("JournalFetcher for '{}' hasnt implemented 'can_use_url()'".format(url))
+    @staticmethod
+    @abstractmethod
+    def get_publication(url: str) -> Publication:
+        """
+        Abstract-function to be implemented in subclass.
+        Creates a Publication-instance.
+        """
+        raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url))
+    # A Dictionary, which connects abbreviation to whole journal-name
+    abbrev_dict = {
+          "Nat. Protoc.":"Journal of Natural Products"
+        ,"PLoS Comput. Biol.":"PLoS Computational Biology"
+        ,"PLoS One":"PLoS One"
+        ,"Protein Sci.":"Protein Science"
+        ,"J. Am. Chem. Soc.":"Journal of the American Chemical Society"
+        ,"J. Chem. Phys.":"Journal of Chemical Physics"
+        ,"Appl. Sci.":"Applied Science"
+        ,"Comput. Sci. Eng.":"Computing in Science & Engineering"
+        ,"Beilstein J. Org. Chem.":"Beilstein Journal of Organic Chemistry"
+        ,"Biol. Chem.":"Biological Chemistry"
+        ,"Isr. J. Chem.":"Israel Journal of Chemistry"
+        ,"Nat. Methods":"Nature Methods"
+        ,"Proc. Natl. Acad. Sci. U. S. A.":"Proceedings of the National Academy of Sciences of the United States of America"
+        ,"J. Phys. Chem. B":"Journal of Physical Chemistry B"
+        ,"Carbohydr. Res.":"Carbohydrate Research"
+        ,"J. Chem. Theory Comput.":"Journal of Chemical Theory and Computation"
+        ,"J. Mol. Biol.":"Journal of Molecular Biology"
+        ,"Nucleic Acids Res.":"Nucleic Acids Research"
+        ,"J. Comput. Chem.":"Journal of Computational Chemistry"
+        ,"J. Cheminf.":"Journal of Cheminformatics"
+        ,"J. Med. Chem.":"Journal of Medicinal Chemistry"
+        ,"J. Comput.-Aided Mol. Des.":"Journal of Computer-Aided Molecular Design"
+        ,"J. Chem. Inf. Model.":"Journal of Chemical Information and Modeling"
+        ,"Mol. Cell":"Molecular Cell"
+        ,"J. Cell Biolog.":"Journal of Cell Biology"
+        ,"Mol. Cell Biol.":"Molecular and Cellular Biology"
+        ,"J. Cell Sci.":"Journal of Cell Science"
+        ,"Nat. Cell Biol.":"Nature Cell Biology"
+        ,"J. Aerosol Sci. Technol.":"Aerosol Science and Technology"
+        ,"Mol. Biol. Cell":"Molecular Biology of the Cell"
+        ,"Build. Environ.":"Building and Environment"
+        ,"Sci. Rep.":"Scientific Reports"
+        ,"Nat. Chem.":"Nature Chemistry"
+        ,"Nat. Med.":"Nature Medicine"
+        ,"Nat. Commun.":"Nature Communications"
+        ,"Exp. Cell Res.":"Experimental Cell Research"
+        ,"Nat. Chem. Biol.":"Nature Chemical Biology"
+        }
\ No newline at end of file
--- a/input/get/nature.py
+++ b/input/get/nature.py
+#!/usr/bin/env python3
+"""
+Child class of JournalFetcher
+Usage: Check if Url can be used with 'can_use_url'
+       and then fetch publication with 'get_publication'
+"""
+# import re
+from input.get.journal_fetcher import JournalFetcher
+from input.publication import Publication
+class Fetcher(JournalFetcher):
+    """
+    scrapes publication metadata from a provided url
+    """
+    #   TODO: List of Compatable Journals
+    #   NOTE: nature does not use journal names in doi links, must match by 10.xxxx identifier instead
+    SUPPORTED_JOURNALS = []
+    @staticmethod
+    def can_use_url(url: str) -> bool:
+        """
+        Checks if given url links to a supported journal.
+        """
+        # TODO: Check the URL for compatability
+        #   re.match in SUPPORTED_JOURNALS
+        return False
+    @staticmethod
+    def get_publication(url: str) -> Publication:
+        """
+        Creates a Publication-instance.
+        """
+        soup = JournalFetcher.get_soup(url)
+        _doi_url = "https://doi.org/" + soup.head.find(attrs={"name": "DOI"}).get("content")
+        _title = soup.head.find(attrs={"name": "citation_title"}).get("content")
+        _journal = soup.head.find(attrs={"name": "citation_journal_title"}).get("content")
+        _published = soup.head.find(attrs={"name": "prism.publicationDate"}).get("content")
+        _contributors = []
+        _subjects = []
+        for creator in soup.head.findAll(attrs={"name": "dc.creator"}):
+            _contributors.append(creator.get("content"))
+        for subject in soup.head.findAll(attrs={"name": "dc.subject"}):
+            _subjects.append(subject.get("content"))
+        return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects)
+        # TODO: Exceptions-handling
+        #   raise ValueException("Cant Fetch: '{}'".format(error))
+        # return None
--- a/input/get/template_.py
+++ b/input/get/template_.py
+#!/usr/bin/env python3
+"""
+Child class of JournalFetcher
+Usage: None, this is just a template and should be ignored
+"""
+# import re
+from input.get.journal_fetcher import JournalFetcher
+from input.publication import Publication
+class Fetcher(JournalFetcher):
+    """
+    This is only a template and therefore has no functionality
+    """
+    # TODO: Naming-Convention:
+    #   Class: 'Fetcher'
+    #   file: [journal-/organisation-name]
+    #       format = "[a-z]*.py" allowed
+    #   TODO: List of Compatable Journals
+    SUPPORTED_JOURNALS = []
+    @staticmethod
+    def can_use_url(url: str) -> bool:
+        """
+        Checks if given url links to a supported journal.
+        """
+        # TODO: Check the URL for compatability
+        #   url_re = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url)
+        #   if url_re is not None:
+        #       return   url_re[4] in SUPPORTED_JOURNALS
+        #   else:
+        return False
+    @staticmethod
+    def get_publication(url: str) -> Publication:
+        """
+        Creates a Publication-instance.
+        """
+        # TODO: Fetch data from the HTML
+        #   soup = JournalFetcher.get_soup(url)
+        #   doi,title,contributors[],journal,publication_date,subjects[],references[],citations[] 
+        # TODO: Create new Publication-instance
+        #   return Publication(doi_url, title, contributors = [], journal
+        #           , publication_date, subjects = [], references = [], citations = [])
+        return None
\ No newline at end of file
--- a/input/interface.py
+++ b/input/interface.py
+#!/usr/bin/env python3
+"""
+Interface for the Input-Package only this should be accessed from outside this Package.
+"""
+from os import walk
+import importlib
+import pathlib
+import re
+from input.publication import Publication
+class InputInterface:
+    """
+    Singleton which dynamically imports and manages fetchers
+    """
+    instance = None
+    get_path = None
+    fetcher_classes=[]
+    # '__new__' is called before '__init__' and gives us an instance
+    def __new__(cls, *args, **kwargs):
+        # checks if an instance exists and if it doesnt creates one
+        if cls.instance == None:
+            cls.instance = super(InputInterface, cls).__new__(cls,*args, **kwargs)
+        return cls.instance
+    def __init__(self):
+        # imports all modules
+        if self.fetcher_classes ==[]:
+            self.import_fetcher_classes()
+            if self.fetcher_classes ==[]:
+                raise AttributeError("No specific Fetchers where found at: '{}'"
+                                    .format(self.get_path))
+    def get_publication(self, url: str) -> Publication:
+        """
+        The interface-method to get a Publication-instance
+        (including it's citations and references)
+        Parameters
+        ----------
+        :param url: url to a Publication
+        :type url: str
+        :return: Publication instance or None if not supported
+        """
+        # Checks if module supports the 'url' and 
+        # returns a Publication if it does.
+        for fetcher_class in InputInterface.fetcher_classes:
+            if fetcher_class.can_use_url(url):
+                return fetcher_class.get_publication(url)
+        # No Module for given url was found
+        raise ValueError("'{}' is not supported".format(url))
+    def get_pub_light(self, url: str) -> Publication:
+        """
+        The interface-method to get a Publication-instance 
+        (only for main article)
+        Parameters
+        ----------
+        :param url: url to a Publication
+        :type url: str
+        :return: Publication instance or None if not supported
+        """
+        # Checks if module supports the 'url' and 
+        # returns a Publication if it does.
+        for fetcher_class in InputInterface.fetcher_classes:
+            if fetcher_class.can_use_url(url):
+                return fetcher_class.get_pub_light(url)
+        # No Module for given url was found
+        raise ValueError("'{}' is not supported".format(url))
+    def get_supported_fetchers(self):
+        # print(self.fetcher_classes[0].__name__) Useless right now, 
+        # because all classes are called the same
+        return [a.__name__ for a in self.fetcher_classes]
+    def import_fetcher_classes(self):
+        """
+        Searches in 'get', if there are [a-z]*.py modules (specific Fetchers)
+        and tries to import them.
+        Saves found modules in 'fetcher_files'.
+        """
+        # Path to 'get'-package
+        self.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve())
+        # Searches for modules with given Pattern
+        fetcher_file_names=[]
+        for file in next(walk(self.get_path), (None, None, []))[2]:
+            if re.match(r'[a-z]+.py', file) is not None:
+                fetcher_file_names.append(file)
+        # Tries to import those modules and saves their 'Fetcher'-class
+        for file in fetcher_file_names:
+            try:
+                fetcher_class = importlib.import_module("input.get.{}".format(file[:-3]))
+                try:
+                    self.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher'))
+                except Exception as error:
+                    ImportError("Module '{}' does not have a 'Fetcher'-class".format(file[:-3]))
+            except Exception:
+                raise ImportError("Module '{}' can not be imported".format(file[:-3]))
--- a/input/publication.py
+++ b/input/publication.py
+#!/usr/bin/env python3
+# this is needed for typing pre python 3.9, this maybe as an large Overhead
+from typing import Any, List
+class Publication:
+    """
+        Represents a Publications
+    """
+    def __init__(self, doi_url: str, title: str \
+                 , contributors: List[str], journal: str \
+                 , publication_date: str, subjects: List[str]\
+                 , references: List[Any] = None, citations: List[Any] = None ):
+        """
+        Parameters
+        ----------
+        :param doi_url: doi_url of the publication
+        :type doi_url: str
+        :param title: title of the publication
+        :type title: str
+        :param contributors:list of all contributors
+        :type contributors: list[]
+        :param published: date of release
+        :type published: str
+        :param subjects: the subject of the Publication
+        :type subjects: List[str]
+        :param references: the Citation which is been referenced by this Publication 
+        :type references: List[Any]
+        :param citations: the Citation which references this Publication
+        :type citations: List[Any]
+        :return: None
+        """
+        self.doi_url = doi_url
+        self.title = title
+        self.contributors = contributors
+        self.journal = journal
+        self.publication_date = publication_date
+        self.subjects = subjects
+        if references is None:
+            self.references = []
+        else:
+            self.references = references
+        if citations is None:
+            self.citations = []
+        else: 
+            self.citations = citations
+        # For the 'Verarbeitungsgruppe'
+        self.group = None
+    def __str__(self) -> str:
+        return ("Title:        {}\n"
+                "Doi-url:      {}\n"
+                "Authors:      {}\n"
+                "Journal:      {}\n"
+                "Published on: {}\n"
+                "Subjects:     {}\n"
+                "References:   \n{}\n"
+                "Citations:    \n{}")\
+                .format(self.title, self.doi_url, ", ".join(self.contributors)
+                        , self.journal, self.publication_date
+                        , ", ".join(self.subjects)
+                        , "\n".join(self.get_citation_string(self.references))
+                        , "\n".join(self.get_citation_string(self.citations)))
+    @staticmethod
+    def get_citation_string(citations):
+        if citations == []:
+            return ["None"]
+        else:
+            citation_string = []
+            for citation in citations:
+                citation_string.append(citation.__str__())
+        return citation_string
+    def add_citations(self, citation) -> None:
+        """
+        Appends a list of Citations or Citation to self.citations.
+        Parameter
+        ---------
+        :param citation: Citation or Reference of the Publication
+        :type citation: Citation or list[Citation]
+        :return: self.citations
+        """
+        if type(citation) is Citation:
+            self.citations.append(citation)
+        # Checks if 'citation' is a list of Citations
+        elif type(citation) is list:
+            for _cit in citation:
+                if type(_cit) is Citation:
+                    self.citations.append(_cit)
+                else:
+                    raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
+                                    .format(type(_cit)))
+        else:
+            raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
+                            .format(type(citation)))
+        return self.citations
+    def __eq__(self, other) -> bool:
+        """ Compares the unique doi_url of two Publications"""
+        if type(self)==type(other):
+            return self.doi_url == other.doi_url
+        return False
+class Citation:
+    def __init__(self, doi_url: str, title: str \
+                , journal: str, contributors: List[str] \
+                , cit_type: str = "Citation"):
+        """
+        Parameters
+        ----------
+        :param doi_url: doi_url of the publication
+        :type doi_url: str
+        :param title: title of the publication
+        :type title: str
+        :param contributors: list of all contributors
+        :type contributors: List[str]
+        :param cit_type: Specifies if Reference or Citation
+        :type cit_type: str
+        :return: None
+        """
+        self.title = title
+        self.doi_url = doi_url
+        self.journal = journal
+        self.contributors = contributors
+        self.cit_type = cit_type
+    def __str__(self) -> str:
+        return ("\t{}-Title:        {}\n"
+                "\t{}-Doi:          {}\n"
+                "\t{}-Journal:      {}\n"
+                "\t{}-Contributors: {}\n")\
+                .format(self.cit_type, self.title
+                      , self.cit_type, self.doi_url
+                      , self.cit_type, self.journal
+                      , self.cit_type, ", ".join(self.contributors))
--- a/input/requirements.txt
+++ b/input/requirements.txt
+beautifulsoup4
+requests
\ No newline at end of file
--- a/input/test/__init__.py
+++ b/input/test/__init__.py
--- a/input/test/test_acs.py
+++ b/input/test/test_acs.py
--- a/input/test/test_input.py
+++ b/input/test/test_input.py
+import unittest
+from input.get.journal_fetcher import JournalFetcher
+from input.interface import InputInterface
+from input.publication import Publication
+"""
+Testing the Publication fetcher
+Publication 1: 'https://doi.org/10.1021/acs.jcim.1c00203'
+Publication 2: 'doi.org/10.1021/acs.jcim.1c00917'
+Publication 3: '10.1038/nchem.1781'
+Publication 4: '11.12/jaj'
+Publication 5: '11.12/'
+Publication 6: 'https://doi.org/10.1021/acs.jmedchem.0c01332' # Paper is a PDF
+"""
+# TODO: Testcases for:
+#       - Specific Journals: Inherit from FetcherTestCase
+#       - interface module-importer (test case)
+#       - Error detection
+#           - wrong/no Journal_fetchers
+#           - wrong urls
+#           - correct Types in publication
+#       - Edgecases (i.e. paper as pdf, no connection, etc)
+class InterfaceTestCase(unittest.TestCase):
+    def setUp(self):
+        self.assertEqual(InputInterface.instance, None)
+        self.interface = InputInterface()
+    def test_singleton(self):
+        # interface should already be made in setUp()
+        self.assertNotEqual(self.interface.instance, None)
+        new_interface = InputInterface()
+        self.assertEqual(self.interface, new_interface)
+    # def test_imported_modules(self):
+    #    fetchers = self.interface.get_supported_fetchers
+class FetcherTestCase(unittest.TestCase):
+    def can_use_url_test(self, fetcher : JournalFetcher, test_url: str, expected_res: bool):
+        # Tests the 'can_use_url'-method
+        self.assertEqual(fetcher.can_use_url(test_url), expected_res)
+    def get_publication_test(self, fetcher : JournalFetcher, test_url: str, expected_res: Publication):
+        """
+        this test asserts that every variable is equals to the expected result
+        """
+        actual_res = fetcher.get_publication(test_url)
+        self.assertEqual(actual_res.doi_url, expected_res.doi_url)
+        self.assertEqual(actual_res.title, expected_res.title)
+        self.assertEqual(actual_res.contributors, expected_res.contributors)
+        self.assertEqual(actual_res.journal, expected_res.journal)
+        self.assertEqual(actual_res.publication_date, expected_res.publication_date)
+        self.assertEqual(actual_res.subjects, expected_res.subjects)
+        # Checking for all references
+        self.assertEqual(len(actual_res.references), len(expected_res.references))
+        num_references = len(expected_res.references)
+        for i in range(num_references):
+            self.assertEqual(actual_res.references[i].doi_url,      expected_res.references[i].doi_url)
+            self.assertEqual(actual_res.references[i].journal,      expected_res.references[i].journal)
+            self.assertEqual(actual_res.references[i].contributors, expected_res.references[i].contributors)
+            self.assertEqual(actual_res.references[i].cit_type,     expected_res.references[i].cit_type)
+        # Checking for all citations
+        self.assertEqual(len(actual_res.citations), len(expected_res.citations))
+        num_citations = len(expected_res.citations)
+        for i in range(num_citations):
+            self.assertEqual(actual_res.citations[i].doi_url,      expected_res.citations[i].doi_url)
+            self.assertEqual(actual_res.citations[i].journal,      expected_res.citations[i].journal)
+            self.assertEqual(actual_res.citations[i].contributors, expected_res.citations[i].contributors)
+            self.assertEqual(actual_res.citations[i].cit_type,     expected_res.citations[i].cit_type)
+    def get_publication_exception_test(self, fetcher: JournalFetcher, test_url: str):
+        # Ckecks 
+        with self.assertRaises(ValueError):
+            fetcher.get_publication(test_url)
\ No newline at end of file
--- a/input_old/README.md
+++ b/input_old/README.md
+# Projekt CiS-Projekt 2021/22
+Input-Skripts
--- a/input_old/__pycache__/input_fj.cpython-39.pyc
+++ b/input_old/__pycache__/input_fj.cpython-39.pyc
--- a/input_old/example_urls
+++ b/input_old/example_urls
+https://pubs.acs.org/doi/10.1021/acs.jcim.5b00332
+https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709
--- a/input_old/input_fj.py
+++ b/input_old/input_fj.py
+#!/usr/bin/env python3
+"""
+Functions for information retrieval of articles from the ACS journal JCIM
+"""
+__author__ = "Florian Jochens"
+__email__ = "fj@andaco.de"
+__status__ = "Production"
+#__copyright__ = ""
+#__credits__ = ["", "", "", ""]
+#__license__ = ""
+#__version__ = ""
+#__maintainer__ = ""
+from bs4 import BeautifulSoup as bs
+import requests as req
+import sys  
+from pathlib import Path
+class Publication:
+    #_registry = []
+    _citations = []
+    _references = []
+    def __init__(self, title, publication_date, contributors, doi_url, 
+                 subjects = None, num_citations = None):
+        #self._registry.append(self)
+        self.title = title
+        self.publication_date = publication_date
+        self.contributors = contributors
+        self.doi_url = doi_url
+        self.subjects = subjects
+        self.num_citations = num_citations
+        #self._citations = []
+        #self._references = []
+class Citation:
+    def __init__(self, title, journal, contributors, doi_url):
+        self.title = title
+        self.journal = journal
+        self.contributors = contributors
+        self.doi_url = doi_url
+class References:
+    def __init__(self, title, journal, contributors, doi_url):
+        self.title = title
+        self.journal = journal
+        self.contributors = contributors
+        self.doi_url = doi_url
+def get_article_info(soup):
+    header = soup.find('div', class_ = 'article_header-left pull-left')
+    article_title = header.find('span', class_ = 'hlFld-Title').text
+    publication_date = header.find('span', class_ = 'pub-date-value').text
+    for link in header.find('div', class_ = 'article_header-doiurl'):
+        doi_url = link.get('href')
+    subs = header.find('div', class_ = 'article_header-taxonomy')
+    subjects = []
+    for sub in subs.find_all('a'):
+        subjects.append(sub.get('title'))
+    cons = header.find('ul', class_ = 'loa')
+    contributors = []
+    for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'):
+        contributors.append(con.text)
+    numc = header.find('div', class_ = 'articleMetrics_count')
+    if not numc.a:
+        num_citations = 0
+    else:
+        num_citations = numc.a.text
+    pub = Publication(article_title, publication_date, contributors, doi_url,
+                      subjects, num_citations)
+    return pub
+def get_download_url():
+    export = soup.find('div', class_ = 'cit-download-dropdown_content')
+    url = 'https://pubs.acs.org'
+    for link in export.find_all('a'):
+        if link.get('title') == 'Citation and references':
+            url += link.get('href')     
+    print(url)
+    return url
+def download(url): # Download citation and references file
+    if url.find('='):
+        filename = url.rsplit('=', 1)[1]
+    path = Path(('./files/' + filename))
+    if path.is_file():
+        print("File already exists")
+    else:
+        print("File does not exist")
+def get_citation_info(pub, num_citations, soup):
+    pub._citations = []
+    details = soup.find('ol', class_ = 'cited-content_cbyCitation')
+    titles = [] 
+    for title in details.find_all('span', 
+            class_ = 'cited-content_cbyCitation_article-title'):
+        titles.append(title.text.replace('.', ''))
+    journal_names = []
+    for name in details.find_all('span',
+            class_ = 'cited-content_cbyCitation_journal-name'):
+        journal_names.append(name.text)
+    doi_urls = []
+    for url in details.find_all('a'):
+        doi_urls.append(url.get('href'))
+    contributors = []
+    for contrib in details.find_all('span', 
+            class_ = 'cited-content_cbyCitation_article-contributors'):
+        contributors.append(contrib.text)
+    for i in range(0, int(num_citations)):
+        pub._citations.append(Citation(titles[i], journal_names[i], 
+                              contributors[i], doi_urls[i]))
+def print_pub_info(pub):
+    print(f'''Article title:    {pub.title}
+Publication date: {pub.publication_date}
+DOI-URL:          {pub.doi_url}
+Subjects:''')
+    print(*(pub.subjects), sep = ", ")
+    print('\nContributors:')
+    print(*(pub.contributors), sep = ", ")
+    if int(pub.num_citations) > 0:
+        if int(pub.num_citations) == 1:
+            print(f'\nThis publication is cited by the following publication:\n')
+        else:
+            print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n')
+        for citation in pub._citations:
+            print(f'''
+    Title:        {citation.title}
+    Journal:      {citation.journal}
+    Contributors: {citation.contributors}
+    DOI-URL:      {citation.doi_url}
+            ''')
+    else:
+        print('\nThis publication is not cited by any other publication.')
+def input(url):
+    html_text = req.get(url).text
+    soup = bs(html_text, 'html.parser')
+    pub = get_article_info(soup)
+    if int(pub.num_citations) > 0:
+        get_citation_info(pub, int(pub.num_citations), soup)
+    return pub
+#if len(sys.argv) != 2:
+#    sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
+#    exit(1)
+#url = sys.argv[1]
+#pub = input(url)
+#print_pub_info(pub)