Merge remote-tracking branch 'upstream/main' into main

2e5dfde9 · Katja · 059e8e0b · 5701ab3d · 2e5dfde9 · 2e5dfde9
Commit 2e5dfde9 authored 3 years ago by Katja
--- a/.gitignore
+++ b/.gitignore
+**/__pycache__/
--- a/citation_parser_ui.py
+++ b/citation_parser_ui.py
+import base64
+import re
+import dash
+from dash import dcc
+from dash import html
+from dash import callback_context
+from dash.dependencies import Input, Output, State
+from dash.exceptions import PreventUpdate
+from input.interface import InputInterface
+import input.publication
+app = dash.Dash(__name__)
+# List of options when inputting data and generating the graph
+additional_options = ['Update Automatically','Smart Input']
+# Reads the contents of info_box.txt. 
+# They can later be displayed by pressing the corresponding button.
+f = open('info_box.txt', 'r')
+boxcontent = f.read()
+f.close()
+app.layout = html.Div([
+    # Layer 0: For the Header and Help Function(s)
+    html.Div([
+        html.Button(id='show-info',children='Show Info',n_clicks=0),
+        html.Div(id='info-box')
+    ]),
+    # Layer 1: For all mandatory Inputs
+    html.Div([
+        "Input: ",
+        # A simple box for inputting a string. 
+        # Value is transmitted upon pressing return or clicking out of the box.
+        dcc.Input(id='input-string', value='', type='text',debounce=True),
+        # Forward recursion. Values between 1 and 10 can be entered.
+        dcc.Input(id='forward-depth',value='1',type='number',min='1',max='10'),
+        # Backward recursion. Values between 1 and 10 can be entered.
+        dcc.Input(id='backward-depth',value='1',type='number',min='1',max='10'),
+        # Upload box. Can be used via drag-and-drop or byclicking on it to open a file viewer.
+        dcc.Upload(
+            id="upload-data",
+            children=html.Div(
+                ["Drag and drop or click to select a file to upload."]),
+            style={
+                "width": "30%",
+                "height": "60px",
+                "lineHeight": "60px",
+                "borderWidth": "1px",
+                "borderStyle": "dashed",
+                "borderRadius": "5px",
+                "textAlign": "center",
+                "margin": "10px",
+            })
+    ]),
+    # Layer 2: For the checklist, Remove-/Start-Buttons and input-error-message
+    html.Div([
+        # All input DOIs are collected in this checklist. 
+        # It is initialized to avoid error messages.
+        dcc.Checklist(id='input-checklist',options=[],
+            labelStyle = dict(display='block'),value=[]),
+        # Displays error message if 'Smart Input' is active.
+        html.Div(id='input-err',style={'color':'red'}),
+        # Clears the entire list.
+        html.Button(id='clear-all-button',children='Clear All'),
+        # Clear all selected elements.
+        html.Button(id='clear-selected-button',children='Clear Selected'),
+        # Starts the process that generates a graph.
+        html.Button(id='start-button',children='Generate Graph')
+    ]),
+    # Layer 3: For additional Options (e.g. Topological Sort)
+    html.Div([
+        html.H4('Additional Options'),
+        # A checklist of all additional options that are listed above.
+        dcc.Checklist(id='additional-options',
+            options=[{'label':k,'value':k} for k in additional_options],
+            value=[])
+    ]),
+    # Layer 4: For the Graph
+    html.Div([
+        html.Div(id='test-output')
+    ])
+])
+@app.callback(
+    Output('input-checklist','options'),
+    Output('input-checklist','value'),
+    Output('input-string','value'),
+    Output('input-err','children'),
+    Input('input-string','value'),
+    Input('clear-all-button','n_clicks'),
+    Input('clear-selected-button','n_clicks'),
+    Input('upload-data','contents'),
+    State('input-checklist','options'),
+    State('input-checklist','value'),
+    State('additional-options','value')
+)
+def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs,
+        selected_inputs,additional_options):
+    '''
+    Most important callback function. Updates the checklist that holds all inputs.
+    State of the checklist as input is needed so that previews entries are readded.
+    input-string is required as Output to clear the input box after each input.
+    Different actions are performed depending on which input triggered the callback.
+    The value-attribute of input-checklist must be updates so that the values
+    of deleted elements no longer appear in the list of selected elements.
+    :param input_value: given by dcc.Input
+    :type input_value: string
+    :param btn1: signals pressing of clear-all-button
+    :type btn1: int
+    :param btn2: signals pressing of clear-selected-button
+    :type btn2: int
+    :param filecontents: the contents of an uploaded file
+    :type filecontents: bit-string
+    :param all_inputs: all labels and values from the checklist,
+        regardless if they have been checked or not
+    :type all_inputs: list of dictionaries with 2 entries each
+    :param selected_inputs: values of all checked elements
+    :type selected_inputs: list of strings
+    :param addtitional_options: all checked additional options
+    :type additional_options: list of strings
+    '''
+    # changed_id is used to determine which Input has triggered the callback
+    changed_id = [p['prop_id'] for p in callback_context.triggered][0]
+    # if clear-all-button was pressed:
+    if 'clear-all-button' in changed_id:
+        return list(),list(),'',''
+    # if clear-selected-button was pressed:
+    if 'clear-selected-button' in changed_id:
+        all_inputs = [i for i in all_inputs if i['value'] not in selected_inputs]
+        return all_inputs,list(),'',''
+    # when a new element is added via dcc.Input
+    if 'input-string' in changed_id:
+        # Creates a list of previously added inputs to make sure nothing is added twice
+        currValues = [x['value'] for x in all_inputs]        
+        if input_value not in currValues:
+            # if 'Smart Input' is selected, the input will be checked for validity 
+            # and a more readable string will be returned
+            if 'Smart Input' in additional_options:
+                try:
+                    # Attempts to call get_publication. If unsuccesful, 
+                    # the DOI is not added and an error message is returned
+                    i = InputInterface()
+                    pub = i.get_pub_light(input_value)
+                except Exception as err:
+                    return options,selected_inputs,'','{}'.format(err)
+                # Creates a more readable string to display in the checklist
+                rep_str = pub.contributors[0] + ',' + pub.journal + \
+                        ',' + pub.publication_date
+                all_inputs.append({'label':rep_str, 'value':input_value})
+            # if 'Smart Input' is not selected, the input value is added as is,
+            # without checking for validity.
+            else:
+                all_inputs.append({'label':input_value,'value':input_value})
+        return all_inputs,selected_inputs,'',''
+    # when a txt-file is uploaded
+    if 'upload-data.contents' in changed_id:
+        if filecontents:
+            # Skips the info portion that is added when a file is uploaded
+            found = base64.b64decode(re.search(',(.+?)$', filecontents).group(1))
+            # Returns the binary string into a proper text
+            text = found.decode('utf-8')
+            # Creates a list of inputs by splitting the lines
+            list_of_inputs = (text.strip().split('\n'))
+            CurrValues = [x['value'] for x in all_inputs]
+            # For every line the same actions as for a single input are performed
+            for input_value in list_of_inputs:
+                if input_value not in CurrValues:
+                    if 'Smart Input' in additional_options:
+                        try:
+                            i = InputInterface()
+                            pub = i.get_pub_light(input_value)
+                        except Exception as err:
+                            return all_inputs,selected_inputs,'','{}'.format(err)
+                        rep_str = pub.contributors[0] + ',' + pub.journal + \
+                                ',' + pub.publication_date
+                        all_inputs.append({'label':rep_str, 'value':input_value})
+                    else:
+                        all_inputs.append({'label':input_value,'value':input_value})
+        return all_inputs,selected_inputs,'',''
+    # when the programm is first started:
+    # if this is not done, the input_checklist will be generated 
+    # with one element that contains an empty string
+    if input_value == '':
+        return list(),list(),'',''
+@app.callback(
+    Output('info-box','children'),
+    Input('show-info','n_clicks')
+)
+def show_hide_info_box(n_clicks):
+    '''
+    This callback shows and hides the (first) info-box by, checking how often 
+    the button has been pressed. The text was loaded at the top.
+    :param n_clicks: number of times show-info has been clicked.
+    'type n_clicks: int
+    '''
+    if n_clicks % 2 == 0:
+        return ''
+    else:
+        return html.Div(boxcontent, style={'whiteSpace': 'pre-line'})
+@app.callback(
+    Output('test-output','children'),
+    Input('start-button','n_clicks'),
+    Input('input-checklist','options'),
+    Input('input-checklist','value'),
+    Input('forward-depth','value'),
+    Input('backward-depth','value'),
+    State('additional-options','value')
+)
+def generate_output(n_clicks,all_inputs,selected_inputs,
+        forward_depth,backward_depth,additional_options):
+    '''
+    Basic structure for a callback that generates an output. This is only a 
+    proof of concept and has noting to do with the intended output yet.
+    :param n_clicks: how often has Generate Graph been clicked
+    :type n_clicks: int
+    :param all_inputs: all labels and values from the checklist,
+        regardless if they have been checked or not
+    :type all_inputs: list of dictionaries with 2 entries each
+    :param selected_inputs: values of all checked elements
+    :type selected_inputs: list of strings
+    :param forward_depth: forward recursion depth
+    :type forward_depth: unsigned int
+    :param backward_depth: backward recursion depth
+    :type backward_depth: unsigned int
+    :param additional_options: value of all selected additional options
+    :type additional_options: list of strings
+    '''
+    changed_id = [p['prop_id'] for p in callback_context.triggered][0]
+    if n_clicks is None:
+        raise PreventUpdate
+    elif 'Update Automatically' in additional_options \
+            or 'start-button' in changed_id: 
+        s = ''
+        for i in range(len(all_inputs)):
+            x = all_inputs[i]['value']
+            if x in selected_inputs:
+                s += x*(abs(int(forward_depth)-int(backward_depth)))
+            else:
+                s += x*(int(forward_depth)+int(backward_depth))
+        return s
+    else:
+        raise PreventUpdate
+if __name__ == '__main__':
+    app.run_server(debug=True)
--- a/count_journal.py
+++ b/count_journal.py
+#!/usr/bin/env python3
+from input.interface import InputInterface as Input
+def count_journals(url: str):
+    inter = Input()
+    pub = inter.get_publication(url)
+    if pub.citations:
+        for citation in pub.citations:
+            journal = citation.journal
+            if journal in cit:
+                cit[journal] += 1
+            else:
+                cit[journal] = 1
+    if pub.references:
+        for reference in pub.references:
+            journal = reference.journal
+            if journal in cit:
+                cit[journal] += 1
+            else:
+                cit[journal] = 1
+if __name__ == "__main__":
+    cit = {}
+    count_journals("https://doi.org/10.1021/acs.jcim.1c00203")
+    count_journals("https://doi.org/10.1021/acs.jcim.6b00561")
+    count_journals("https://doi.org/10.1021/acs.jcim.6b00613")
+    count_journals("https://doi.org/10.1021/acs.jcim.1c00917")
+    count_journals("https://doi.org/10.1021/acs.jmedchem.0c01332")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.1c00290")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acsenvironau.1c00007")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.7b01162")
+    cit = dict(sorted(cit.items(), key=lambda item: item[1]))
+    for journal in cit:
+        if journal != "":
+            print(f'{journal}: {cit[journal]}')
--- a/example_input.py
+++ b/example_input.py
+#!/usr/bin/env python3
+from input.interface import InputInterface as Input
+def main(url: str):
+    i = Input()
+    #print(i.get_publication(url))
+    print(i.get_pub_light(url))
+    # print(i.get_supported_fetchers()) Useless because all classes are called the same
+if __name__ == "__main__":
+	#main("https://doi.org/10.1021/acs.jcim.1c0023")
+    main("https://doi.org/10.1021/acs.jcim.5b00332")
--- a/info_box.txt
+++ b/info_box.txt
+English
+Show Info: Can be activated and deactivated by clicking on the button.
+Input: input by entering a DOI ("Digital Object Identifier")
+Drag and drop or click to select a file to upload: entering multiple DOI by txt-file is only possible if every DOI has its own line.
+Recursion:
+Clear All: clearing all inputs
+Clear Selected: clearing all selected inputs
+Generate Graph: generates the graph
+Update Automatically: automatically updates the graph for every new input
+Smart Input: checks the correctness of the entered DOI and shows a nicer depiction: Author, Journal, publication date.
+German 
+Show Info: Durch wiederholtes klicken kann das Fenster ein und aus geblendet werden.
+Input: Die Eingabe erfolgt in Form eines DOI ("Digital Object Identifier") 
+Drag and drop or click to select a file to upload: Mehrere DOI in einem txt-Dokument müssen untereinander angeordnet sein.
+Recursion: 
+Clear All: alle Eingaben werden gelöscht
+Clear Selected: alle markierten Eingaben werden gelöscht
+Generate Graph: generiert den zugehörigen Graphen
+Update Automatically: automatische Aktualisierung des Graphen nach neuer Eingabe
+Smart Input: direkte Überprüfung der Eingabe auf Richtigkeit zudem wird nicht mehr der DOI angezeigt sondern: Der Autor, Das Journal, Das Veröffentlichungsdatum.
--- a/input/README.md
+++ b/input/README.md
+# Projekt CiS-Projekt 2021/22
+Input-Package to fetch publication information with a given url.
+## Usage/Examples
+```python
+from input.interface import InputInterface as Input
+from input.publication import Publication
+def main(url):
+    inter = Input()
+    try:
+        pub = inter.get_publication(url)
+    except Exception as error:
+        raise error
+    print(pub)
+    pub.title = "Cool new Title"
+    print(pub)
+if __name__ == "__main__":
+    main("https://doi.org/10.1021/acs.chemrev.8b00728")
+```
+The expected results of calling this methode are:
+| Input-Url | Result    |
+|-----------|-----------|
+| supported & correct| A publication Instance |
+| supported & uncorrect| ValueError|
+| not supported | ValueError|
+Supported Url are urls, which comply with the url-pattern of supported Journals.  
+### Supported Journals:
+- ACS-Journals
+- (Nature-Journals)
+## Testing
+``` c
+python -m unittest input/test/<file.py> -v
+# for all tests in directory
+python -m unittest discover input/test -v
+```
+## Authors
+- Florian Jochens
+- Sam Ockenden
+- Julius Schenk
\ No newline at end of file
--- a/input/__init__.py
+++ b/input/__init__.py
--- a/input/get/__init__.py
+++ b/input/get/__init__.py
--- a/input/get/__pycache__/__init__.cpython-38.pyc
+++ b/input/get/__pycache__/__init__.cpython-38.pyc
--- a/input/get/__pycache__/acs.cpython-38.pyc
+++ b/input/get/__pycache__/acs.cpython-38.pyc
--- a/input/get/__pycache__/journal_fetcher.cpython-38.pyc
+++ b/input/get/__pycache__/journal_fetcher.cpython-38.pyc
--- a/input/get/__pycache__/nature.cpython-38.pyc
+++ b/input/get/__pycache__/nature.cpython-38.pyc
--- a/input/get/acs.py
+++ b/input/get/acs.py
+#!/usr/bin/env python3
+"""
+Child class of JournalFetcher
+Usage: Check if Url can be used with 'can_use_url'
+       and then fetch publication with 'get_publication'
+"""
+import re
+from input.get.journal_fetcher import JournalFetcher
+from input.publication import Publication, Citation
+class Fetcher(JournalFetcher):
+    """
+    Specific Fetcher for the ACS journals.
+    """
+    # Constant for the abbreviations of the supported Journals
+    SUPPORTED_JOURNALS = ['1021']
+    @staticmethod
+    def can_use_url(url: str) -> str:
+        """
+        Uses Regex to extract journal specific substrings in Doi.
+        TODO: Support non Doi-urls
+        """
+        matched_url = re.match(r'^(https?://)?(doi.org/|pubs.acs.org/doi/)?(10.(\d{4})/\w+.\S+)', url.strip(". \t\r\n"))
+        #Checks if match exists
+        if matched_url is not None:
+            return matched_url[4] in Fetcher.SUPPORTED_JOURNALS
+        else:
+            return False
+    @staticmethod
+    def get_pub_light(url: str) -> Publication:
+        """
+        Fetches html and creates Beatifulsoup-instance in parent class.
+        Specific css-searches for ACS-Journals and creates Publication-instance.
+        """
+        # Creation of Soup
+        try:
+            soup = JournalFetcher.get_soup(url)
+        except Exception as error:
+            raise error
+        # Raise Error if re recognizes Pattern, but url isnt correct:
+        #   For other Urls
+        if soup.text.strip(" \t\n")=="Missing resource null":
+            raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        #   For Dois
+        if soup.title is not None:
+            if soup.title.text == "Error: DOI Not Found":
+                raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        soup_header = soup.select('.article_header')[0]
+        # Creates Publication
+        doi_url = soup_header.select('a[title="DOI URL"]')[0].string
+        title = soup_header.select(".hlFld-Title")[0].text
+        contributors = []
+        for author in soup_header.select(".hlFld-ContribAuthor"):
+            contributors.append(author.text)
+        journal = soup_header.select(".cit-title")[0].text
+        # Replaces abbreviation with whole name
+        if journal in JournalFetcher.abbrev_dict:
+            journal = JournalFetcher.abbrev_dict[journal]
+        published = soup_header.select(".pub-date-value")[0].text
+        subjects = []
+        subject_soup = soup_header.select('.article_header-taxonomy')[0]
+        for subject in subject_soup.select('a'):
+            subjects.append(subject.text)
+        return Publication(doi_url, title, contributors, journal, published, 
+                           subjects)
+    def get_publication(url: str) -> Publication:
+        """
+        Fetches html and creates Beatifulsoup-instance in parent class.
+        Specific css-searches for ACS-Journals and creates Publication-instance.
+        """
+        # Creation of Soup
+        try:
+            soup = JournalFetcher.get_soup(url)
+        except Exception as error:
+            raise error
+        # Raise Error if re recognizes Pattern, but url isnt correct:
+        #   For other Urls
+        if soup.text.strip(" \t\n")=="Missing resource null":
+            raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        #   For Dois
+        if soup.title is not None:
+            if soup.title.text == "Error: DOI Not Found":
+                raise ValueError("'{}' matches Pattern for 'ACS', but doesnt link to Paper.".format(url))
+        soup_header = soup.select('.article_header')[0]
+        #Could be used for more specific search
+        ref_cit_soup = soup
+        # Creates Publication
+        doi_url = soup_header.select('a[title="DOI URL"]')[0].string
+        title = soup_header.select(".hlFld-Title")[0].text
+        contributors = []
+        for author in soup_header.select(".hlFld-ContribAuthor"):
+            contributors.append(author.text)
+        journal = soup_header.select(".cit-title")[0].text
+        # Replaces abbreviation with whole name
+        if journal in JournalFetcher.abbrev_dict:
+            journal = JournalFetcher.abbrev_dict[journal]
+        published = soup_header.select(".pub-date-value")[0].text
+        subjects = []
+        subject_soup = soup_header.select('.article_header-taxonomy')[0]
+        for subject in subject_soup.select('a'):
+            subjects.append(subject.text)
+        references = []
+        references_soup = ref_cit_soup.select('ol#references')
+        if references_soup != []:
+            for reference in references_soup[0].select('li'):
+                if reference.select('.refDoi') != []:
+                    ref_doi = "https://doi.org/{}".format(reference.select('.refDoi')[0].text.strip()[5:])
+                else: 
+        #           No Doi -> No Paper
+                    continue
+                ref_title = reference.select('.NLM_article-title')[0].text\
+                        if reference.select('.NLM_article-title') != [] else None
+                ref_journal = reference.select('i')[0].text\
+                        if reference.select('i') != [] else None
+                # Replaces abbreviation with whole name
+                if ref_journal in JournalFetcher.abbrev_dict:
+                    ref_journal = JournalFetcher.abbrev_dict[ref_journal]
+                ref_contributors=[]
+                for author in reference.select('.NLM_contrib-group'):
+                    ref_contributors.append(author.text.replace("\n", " ").replace("\r", ""))
+                references.append(Citation(ref_doi, ref_title, ref_journal, ref_contributors, cit_type="Reference"))
+        citations = []
+        citation_soup = ref_cit_soup.select('.cited-content_cbyCitation')
+        if citation_soup != []:
+            for citation in citation_soup[0].select('li'):
+                if citation.select('a[title="DOI URL"]') != []: 
+                    cit_doi = citation.select('a[title="DOI URL"]')[0].text
+                else:
+        #           No Doi -> No Paper
+                    continue
+                cit_title = citation.select('.cited-content_cbyCitation_article-title')[0].text\
+                        if citation.select('.cited-content_cbyCitation_article-title')!= [] else None
+                cit_journal = citation.select('.cited-content_cbyCitation_journal-name')[0].text\
+                        if citation.select('.cited-content_cbyCitation_journal-name') != [] else None
+                # Replaces abbreviation with whole name
+                if cit_journal in JournalFetcher.abbrev_dict:
+                    cit_journal = JournalFetcher.abbrev_dict[cit_journal]
+                cit_contributors =[]
+                cit_contributors = citation.select('.cited-content_cbyCitation_article-contributors')[0]\
+                    .text.replace("\n", " ").replace("\r", "").split(', ')
+        #           clean up of the last Entry
+                cit_contributors_last = cit_contributors.pop().strip(". ")
+                if cit_contributors_last != '':
+                    cit_contributors.append(cit_contributors_last)  
+                citations.append(Citation(cit_doi, cit_title, cit_journal, cit_contributors, cit_type = "Citation"))
+        return Publication(doi_url, title, contributors, journal, published
+                            , subjects, references, citations)
--- a/input/get/journal_fetcher.py
+++ b/input/get/journal_fetcher.py
+#!/usr/bin/env python3
+"""
+Parent class for specific Journal
+"""
+from abc import ABCMeta, abstractmethod
+from bs4 import BeautifulSoup
+import requests
+from input.publication import Publication
+class JournalFetcher(metaclass=ABCMeta):
+    """
+    This is a abstract-class for fetcher modules
+    """
+    @staticmethod
+    def get_soup(url: str) -> BeautifulSoup:
+        """
+        Retrieves webside-html and returns a BeautifulSoup-instance
+        Parameters:
+        -----------
+        :type url: str
+        :param url: doi-url to a publication
+        :return: BeatifulSoup-instance
+        """
+        try:
+            req = requests.get(url)
+        except  requests.exceptions.HTTPError as err:
+            raise SystemExit(err)
+        return BeautifulSoup(req.content, 'html.parser')
+    @staticmethod
+    @abstractmethod
+    def can_use_url(url: str) -> bool:
+        """
+        Abstract-function to be implemented in subclass.
+        Checks if given url links to a supported journal
+        """
+        raise AttributeError("JournalFetcher for '{}' hasnt implemented 'can_use_url()'".format(url))
+    @staticmethod
+    @abstractmethod
+    def get_publication(url: str) -> Publication:
+        """
+        Abstract-function to be implemented in subclass.
+        Creates a Publication-instance.
+        """
+        raise AttributeError("JournalFetcher for '{}' hasnt implemented 'get_publication()'".format(url))
+    # A Dictionary, which connects abbreviation to whole journal-name
+    abbrev_dict = {
+          "Nat. Protoc.":"Journal of Natural Products"
+        ,"PLoS Comput. Biol.":"PLoS Computational Biology"
+        ,"PLoS One":"PLoS One"
+        ,"Protein Sci.":"Protein Science"
+        ,"J. Am. Chem. Soc.":"Journal of the American Chemical Society"
+        ,"J. Chem. Phys.":"Journal of Chemical Physics"
+        ,"Appl. Sci.":"Applied Science"
+        ,"Comput. Sci. Eng.":"Computing in Science & Engineering"
+        ,"Beilstein J. Org. Chem.":"Beilstein Journal of Organic Chemistry"
+        ,"Biol. Chem.":"Biological Chemistry"
+        ,"Isr. J. Chem.":"Israel Journal of Chemistry"
+        ,"Nat. Methods":"Nature Methods"
+        ,"Proc. Natl. Acad. Sci. U. S. A.":"Proceedings of the National Academy of Sciences of the United States of America"
+        ,"J. Phys. Chem. B":"Journal of Physical Chemistry B"
+        ,"Carbohydr. Res.":"Carbohydrate Research"
+        ,"J. Chem. Theory Comput.":"Journal of Chemical Theory and Computation"
+        ,"J. Mol. Biol.":"Journal of Molecular Biology"
+        ,"Nucleic Acids Res.":"Nucleic Acids Research"
+        ,"J. Comput. Chem.":"Journal of Computational Chemistry"
+        ,"J. Cheminf.":"Journal of Cheminformatics"
+        ,"J. Med. Chem.":"Journal of Medicinal Chemistry"
+        ,"J. Comput.-Aided Mol. Des.":"Journal of Computer-Aided Molecular Design"
+        ,"J. Chem. Inf. Model.":"Journal of Chemical Information and Modeling"
+        ,"Mol. Cell":"Molecular Cell"
+        ,"J. Cell Biolog.":"Journal of Cell Biology"
+        ,"Mol. Cell Biol.":"Molecular and Cellular Biology"
+        ,"J. Cell Sci.":"Journal of Cell Science"
+        ,"Nat. Cell Biol.":"Nature Cell Biology"
+        ,"J. Aerosol Sci. Technol.":"Aerosol Science and Technology"
+        ,"Mol. Biol. Cell":"Molecular Biology of the Cell"
+        ,"Build. Environ.":"Building and Environment"
+        ,"Sci. Rep.":"Scientific Reports"
+        ,"Nat. Chem.":"Nature Chemistry"
+        ,"Nat. Med.":"Nature Medicine"
+        ,"Nat. Commun.":"Nature Communications"
+        ,"Exp. Cell Res.":"Experimental Cell Research"
+        ,"Nat. Chem. Biol.":"Nature Chemical Biology"
+        }
\ No newline at end of file
--- a/input/get/nature.py
+++ b/input/get/nature.py
+#!/usr/bin/env python3
+"""
+Child class of JournalFetcher
+Usage: Check if Url can be used with 'can_use_url'
+       and then fetch publication with 'get_publication'
+"""
+# import re
+from input.get.journal_fetcher import JournalFetcher
+from input.publication import Publication
+class Fetcher(JournalFetcher):
+    """
+    scrapes publication metadata from a provided url
+    """
+    #   TODO: List of Compatable Journals
+    #   NOTE: nature does not use journal names in doi links, must match by 10.xxxx identifier instead
+    SUPPORTED_JOURNALS = []
+    @staticmethod
+    def can_use_url(url: str) -> bool:
+        """
+        Checks if given url links to a supported journal.
+        """
+        # TODO: Check the URL for compatability
+        #   re.match in SUPPORTED_JOURNALS
+        return False
+    @staticmethod
+    def get_publication(url: str) -> Publication:
+        """
+        Creates a Publication-instance.
+        """
+        soup = JournalFetcher.get_soup(url)
+        _doi_url = "https://doi.org/" + soup.head.find(attrs={"name": "DOI"}).get("content")
+        _title = soup.head.find(attrs={"name": "citation_title"}).get("content")
+        _journal = soup.head.find(attrs={"name": "citation_journal_title"}).get("content")
+        _published = soup.head.find(attrs={"name": "prism.publicationDate"}).get("content")
+        _contributors = []
+        _subjects = []
+        for creator in soup.head.findAll(attrs={"name": "dc.creator"}):
+            _contributors.append(creator.get("content"))
+        for subject in soup.head.findAll(attrs={"name": "dc.subject"}):
+            _subjects.append(subject.get("content"))
+        return Publication(_doi_url, _title, _contributors, _journal, _published, _subjects)
+        # TODO: Exceptions-handling
+        #   raise ValueException("Cant Fetch: '{}'".format(error))
+        # return None
--- a/input/get/template_.py
+++ b/input/get/template_.py
+#!/usr/bin/env python3
+"""
+Child class of JournalFetcher
+Usage: None, this is just a template and should be ignored
+"""
+# import re
+from input.get.journal_fetcher import JournalFetcher
+from input.publication import Publication
+class Fetcher(JournalFetcher):
+    """
+    This is only a template and therefore has no functionality
+    """
+    # TODO: Naming-Convention:
+    #   Class: 'Fetcher'
+    #   file: [journal-/organisation-name]
+    #       format = "[a-z]*.py" allowed
+    #   TODO: List of Compatable Journals
+    SUPPORTED_JOURNALS = []
+    @staticmethod
+    def can_use_url(url: str) -> bool:
+        """
+        Checks if given url links to a supported journal.
+        """
+        # TODO: Check the URL for compatability
+        #   url_re = re.match(r'(https?://)?(doi.org/)?(10.(\d{4})/\w+.\S+)', url)
+        #   if url_re is not None:
+        #       return   url_re[4] in SUPPORTED_JOURNALS
+        #   else:
+        return False
+    @staticmethod
+    def get_publication(url: str) -> Publication:
+        """
+        Creates a Publication-instance.
+        """
+        # TODO: Fetch data from the HTML
+        #   soup = JournalFetcher.get_soup(url)
+        #   doi,title,contributors[],journal,publication_date,subjects[],references[],citations[] 
+        # TODO: Create new Publication-instance
+        #   return Publication(doi_url, title, contributors = [], journal
+        #           , publication_date, subjects = [], references = [], citations = [])
+        return None
\ No newline at end of file
--- a/input/interface.py
+++ b/input/interface.py
+#!/usr/bin/env python3
+"""
+Interface for the Input-Package only this should be accessed from outside this Package.
+"""
+from os import walk
+import importlib
+import pathlib
+import re
+from input.publication import Publication
+class InputInterface:
+    """
+    Singleton which dynamically imports and manages fetchers
+    """
+    instance = None
+    get_path = None
+    fetcher_classes=[]
+    # '__new__' is called before '__init__' and gives us an instance
+    def __new__(cls, *args, **kwargs):
+        # checks if an instance exists and if it doesnt creates one
+        if cls.instance == None:
+            cls.instance = super(InputInterface, cls).__new__(cls,*args, **kwargs)
+        return cls.instance
+    def __init__(self):
+        # imports all modules
+        if self.fetcher_classes ==[]:
+            self.import_fetcher_classes()
+            if self.fetcher_classes ==[]:
+                raise AttributeError("No specific Fetchers where found at: '{}'"
+                                    .format(self.get_path))
+    def get_publication(self, url: str) -> Publication:
+        """
+        The interface-method to get a Publication-instance
+        (including it's citations and references)
+        Parameters
+        ----------
+        :param url: url to a Publication
+        :type url: str
+        :return: Publication instance or None if not supported
+        """
+        # Checks if module supports the 'url' and 
+        # returns a Publication if it does.
+        for fetcher_class in InputInterface.fetcher_classes:
+            if fetcher_class.can_use_url(url):
+                return fetcher_class.get_publication(url)
+        # No Module for given url was found
+        raise ValueError("'{}' is not supported".format(url))
+    def get_pub_light(self, url: str) -> Publication:
+        """
+        The interface-method to get a Publication-instance 
+        (only for main article)
+        Parameters
+        ----------
+        :param url: url to a Publication
+        :type url: str
+        :return: Publication instance or None if not supported
+        """
+        # Checks if module supports the 'url' and 
+        # returns a Publication if it does.
+        for fetcher_class in InputInterface.fetcher_classes:
+            if fetcher_class.can_use_url(url):
+                return fetcher_class.get_pub_light(url)
+        # No Module for given url was found
+        raise ValueError("'{}' is not supported".format(url))
+    def get_supported_fetchers(self):
+        # print(self.fetcher_classes[0].__name__) Useless right now, 
+        # because all classes are called the same
+        return [a.__name__ for a in self.fetcher_classes]
+    def import_fetcher_classes(self):
+        """
+        Searches in 'get', if there are [a-z]*.py modules (specific Fetchers)
+        and tries to import them.
+        Saves found modules in 'fetcher_files'.
+        """
+        # Path to 'get'-package
+        self.get_path = '{}/get'.format(pathlib.Path(__file__).parent.resolve())
+        # Searches for modules with given Pattern
+        fetcher_file_names=[]
+        for file in next(walk(self.get_path), (None, None, []))[2]:
+            if re.match(r'[a-z]+.py', file) is not None:
+                fetcher_file_names.append(file)
+        # Tries to import those modules and saves their 'Fetcher'-class
+        for file in fetcher_file_names:
+            try:
+                fetcher_class = importlib.import_module("input.get.{}".format(file[:-3]))
+                try:
+                    self.fetcher_classes.append(fetcher_class.__getattribute__('Fetcher'))
+                except Exception as error:
+                    ImportError("Module '{}' does not have a 'Fetcher'-class".format(file[:-3]))
+            except Exception:
+                raise ImportError("Module '{}' can not be imported".format(file[:-3]))
--- a/input/publication.py
+++ b/input/publication.py
+#!/usr/bin/env python3
+# this is needed for typing pre python 3.9, this maybe as an large Overhead
+from typing import Any, List
+class Publication:
+    """
+        Represents a Publications
+    """
+    def __init__(self, doi_url: str, title: str \
+                 , contributors: List[str], journal: str \
+                 , publication_date: str, subjects: List[str]\
+                 , references: List[Any] = None, citations: List[Any] = None ):
+        """
+        Parameters
+        ----------
+        :param doi_url: doi_url of the publication
+        :type doi_url: str
+        :param title: title of the publication
+        :type title: str
+        :param contributors:list of all contributors
+        :type contributors: list[]
+        :param published: date of release
+        :type published: str
+        :param subjects: the subject of the Publication
+        :type subjects: List[str]
+        :param references: the Citation which is been referenced by this Publication 
+        :type references: List[Any]
+        :param citations: the Citation which references this Publication
+        :type citations: List[Any]
+        :return: None
+        """
+        self.doi_url = doi_url
+        self.title = title
+        self.contributors = contributors
+        self.journal = journal
+        self.publication_date = publication_date
+        self.subjects = subjects
+        if references is None:
+            self.references = []
+        else:
+            self.references = references
+        if citations is None:
+            self.citations = []
+        else: 
+            self.citations = citations
+        # For the 'Verarbeitungsgruppe'
+        self.group = None
+    def __str__(self) -> str:
+        return ("Title:        {}\n"
+                "Doi-url:      {}\n"
+                "Authors:      {}\n"
+                "Journal:      {}\n"
+                "Published on: {}\n"
+                "Subjects:     {}\n"
+                "References:   \n{}\n"
+                "Citations:    \n{}")\
+                .format(self.title, self.doi_url, ", ".join(self.contributors)
+                        , self.journal, self.publication_date
+                        , ", ".join(self.subjects)
+                        , "\n".join(self.get_citation_string(self.references))
+                        , "\n".join(self.get_citation_string(self.citations)))
+    @staticmethod
+    def get_citation_string(citations):
+        if citations == []:
+            return ["None"]
+        else:
+            citation_string = []
+            for citation in citations:
+                citation_string.append(citation.__str__())
+        return citation_string
+    def add_citations(self, citation) -> None:
+        """
+        Appends a list of Citations or Citation to self.citations.
+        Parameter
+        ---------
+        :param citation: Citation or Reference of the Publication
+        :type citation: Citation or list[Citation]
+        :return: self.citations
+        """
+        if type(citation) is Citation:
+            self.citations.append(citation)
+        # Checks if 'citation' is a list of Citations
+        elif type(citation) is list:
+            for _cit in citation:
+                if type(_cit) is Citation:
+                    self.citations.append(_cit)
+                else:
+                    raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
+                                    .format(type(_cit)))
+        else:
+            raise TypeError("_set_citation expects Citations or List of Citations, not: '{}'"
+                            .format(type(citation)))
+        return self.citations
+    def __eq__(self, other) -> bool:
+        """ Compares the unique doi_url of two Publications"""
+        if type(self)==type(other):
+            return self.doi_url == other.doi_url
+        return False
+class Citation:
+    def __init__(self, doi_url: str, title: str \
+                , journal: str, contributors: List[str] \
+                , cit_type: str = "Citation"):
+        """
+        Parameters
+        ----------
+        :param doi_url: doi_url of the publication
+        :type doi_url: str
+        :param title: title of the publication
+        :type title: str
+        :param contributors: list of all contributors
+        :type contributors: List[str]
+        :param cit_type: Specifies if Reference or Citation
+        :type cit_type: str
+        :return: None
+        """
+        self.title = title
+        self.doi_url = doi_url
+        self.journal = journal
+        self.contributors = contributors
+        self.cit_type = cit_type
+    def __str__(self) -> str:
+        return ("\t{}-Title:        {}\n"
+                "\t{}-Doi:          {}\n"
+                "\t{}-Journal:      {}\n"
+                "\t{}-Contributors: {}\n")\
+                .format(self.cit_type, self.title
+                      , self.cit_type, self.doi_url
+                      , self.cit_type, self.journal
+                      , self.cit_type, ", ".join(self.contributors))
--- a/input/requirements.txt
+++ b/input/requirements.txt
+beautifulsoup4
+requests
\ No newline at end of file
--- a/input/test/__init__.py
+++ b/input/test/__init__.py