diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..61f2dc9f84d472c32fa57194620d6b1e5fa14649
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+**/__pycache__/
diff --git a/count_journal.py b/count_journal.py
new file mode 100755
index 0000000000000000000000000000000000000000..13886a2e7badf339bdd23475f7d3de713329f472
--- /dev/null
+++ b/count_journal.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+
+from input.interface import InputInterface as Input
+
+def count_journals(url: str):
+    inter = Input()
+    pub = inter.get_publication(url)
+
+    if pub.citations:
+        for citation in pub.citations:
+            journal = citation.journal
+            if journal in cit:
+                cit[journal] += 1
+            else:
+                cit[journal] = 1
+
+    if pub.references:
+        for reference in pub.references:
+            journal = reference.journal
+            if journal in cit:
+                cit[journal] += 1
+            else:
+                cit[journal] = 1
+
+if __name__ == "__main__":
+    cit = {}
+	
+    count_journals("https://doi.org/10.1021/acs.jcim.1c00203")
+    count_journals("https://doi.org/10.1021/acs.jcim.6b00561")
+    count_journals("https://doi.org/10.1021/acs.jcim.6b00613")
+    count_journals("https://doi.org/10.1021/acs.jcim.1c00917")
+    count_journals("https://doi.org/10.1021/acs.jmedchem.0c01332")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.1c00290")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acsenvironau.1c00007")
+		#count_journals("https://pubs.acs.org/doi/10.1021/acs.biochem.7b01162")
+
+    cit = dict(sorted(cit.items(), key=lambda item: item[1]))
+    for journal in cit:
+        if journal != "":
+            print(f'{journal}: {cit[journal]}')
diff --git a/example_input.py b/example_input.py
new file mode 100755
index 0000000000000000000000000000000000000000..c9bca4189fce4c1fd0a0dfc42ef4e517baa5f406
--- /dev/null
+++ b/example_input.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+from input.interface import InputInterface as Input
+
+def main(url: str):
+    i = Input()
+    #print(i.get_publication(url))
+    print(i.get_pub_light(url))
+    # print(i.get_supported_fetchers()) Useless because all classes are called the same
+
+if __name__ == "__main__":
+	#main("https://doi.org/10.1021/acs.jcim.1c0023")
+    main("https://doi.org/10.1021/acs.jcim.5b00332")
diff --git a/input_old/README.md b/input_old/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..76bd11d5d70daac13e190f4d52269eb381413c69
--- /dev/null
+++ b/input_old/README.md
@@ -0,0 +1,3 @@
+# Projekt CiS-Projekt 2021/22
+Input-Skripts
+
diff --git a/input_old/__pycache__/input_fj.cpython-39.pyc b/input_old/__pycache__/input_fj.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a3e6099f4ab4c56400b2698c812d4b5fc9a9a7aa
Binary files /dev/null and b/input_old/__pycache__/input_fj.cpython-39.pyc differ
diff --git a/input_old/example_urls b/input_old/example_urls
new file mode 100644
index 0000000000000000000000000000000000000000..96ac680c65edddcb495312000157edea1ab94884
--- /dev/null
+++ b/input_old/example_urls
@@ -0,0 +1,2 @@
+https://pubs.acs.org/doi/10.1021/acs.jcim.5b00332
+https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709
diff --git a/input_old/input_fj.py b/input_old/input_fj.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecc8e68fc5a84a446ae3f09dcb5ed56e8d262766
--- /dev/null
+++ b/input_old/input_fj.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""
+Functions for information retrieval of articles from the ACS journal JCIM
+
+"""
+
+__author__ = "Florian Jochens"
+__email__ = "fj@andaco.de"
+__status__ = "Production"
+#__copyright__ = ""
+#__credits__ = ["", "", "", ""]
+#__license__ = ""
+#__version__ = ""
+#__maintainer__ = ""
+
+from bs4 import BeautifulSoup as bs
+import requests as req
+import sys  
+from pathlib import Path
+
+class Publication:
+    #_registry = []
+    _citations = []
+    _references = []
+    
+    def __init__(self, title, publication_date, contributors, doi_url, 
+                 subjects = None, num_citations = None):
+        #self._registry.append(self)
+        self.title = title
+        self.publication_date = publication_date
+        self.contributors = contributors
+        self.doi_url = doi_url
+        self.subjects = subjects
+        self.num_citations = num_citations
+        #self._citations = []
+        #self._references = []
+
+class Citation:
+    def __init__(self, title, journal, contributors, doi_url):
+        self.title = title
+        self.journal = journal
+        self.contributors = contributors
+        self.doi_url = doi_url
+
+class References:
+    def __init__(self, title, journal, contributors, doi_url):
+        self.title = title
+        self.journal = journal
+        self.contributors = contributors
+        self.doi_url = doi_url
+    
+def get_article_info(soup):
+    header = soup.find('div', class_ = 'article_header-left pull-left')
+    article_title = header.find('span', class_ = 'hlFld-Title').text
+    publication_date = header.find('span', class_ = 'pub-date-value').text
+    for link in header.find('div', class_ = 'article_header-doiurl'):
+        doi_url = link.get('href')
+    subs = header.find('div', class_ = 'article_header-taxonomy')
+    subjects = []
+    for sub in subs.find_all('a'):
+        subjects.append(sub.get('title'))
+    cons = header.find('ul', class_ = 'loa')
+    contributors = []
+    for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'):
+        contributors.append(con.text)
+    numc = header.find('div', class_ = 'articleMetrics_count')
+    if not numc.a:
+        num_citations = 0
+    else:
+        num_citations = numc.a.text
+
+    pub = Publication(article_title, publication_date, contributors, doi_url,
+                      subjects, num_citations)
+    return pub
+
+def get_download_url():
+    export = soup.find('div', class_ = 'cit-download-dropdown_content')
+    url = 'https://pubs.acs.org'
+    for link in export.find_all('a'):
+        if link.get('title') == 'Citation and references':
+            url += link.get('href')     
+    print(url)
+    return url
+
+def download(url): # Download citation and references file
+    if url.find('='):
+        filename = url.rsplit('=', 1)[1]
+    path = Path(('./files/' + filename))
+    if path.is_file():
+        print("File already exists")
+    else:
+        print("File does not exist")
+
+def get_citation_info(pub, num_citations, soup):
+    pub._citations = []
+    details = soup.find('ol', class_ = 'cited-content_cbyCitation')
+    titles = [] 
+    for title in details.find_all('span', 
+            class_ = 'cited-content_cbyCitation_article-title'):
+        titles.append(title.text.replace('.', ''))
+    journal_names = []
+    for name in details.find_all('span',
+            class_ = 'cited-content_cbyCitation_journal-name'):
+        journal_names.append(name.text)
+    doi_urls = []
+    for url in details.find_all('a'):
+        doi_urls.append(url.get('href'))
+    contributors = []
+    for contrib in details.find_all('span', 
+            class_ = 'cited-content_cbyCitation_article-contributors'):
+        contributors.append(contrib.text)
+    for i in range(0, int(num_citations)):
+        pub._citations.append(Citation(titles[i], journal_names[i], 
+                              contributors[i], doi_urls[i]))
+def print_pub_info(pub):
+    print(f'''Article title:    {pub.title}
+Publication date: {pub.publication_date}
+DOI-URL:          {pub.doi_url}
+
+Subjects:''')
+    print(*(pub.subjects), sep = ", ")
+    print('\nContributors:')
+    print(*(pub.contributors), sep = ", ")
+
+    if int(pub.num_citations) > 0:
+        if int(pub.num_citations) == 1:
+            print(f'\nThis publication is cited by the following publication:\n')
+        else:
+            print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n')
+        for citation in pub._citations:
+            print(f'''
+    Title:        {citation.title}
+    Journal:      {citation.journal}
+    Contributors: {citation.contributors}
+    DOI-URL:      {citation.doi_url}
+            ''')
+    else:
+        print('\nThis publication is not cited by any other publication.')
+
+def input(url):
+    html_text = req.get(url).text
+    soup = bs(html_text, 'html.parser')
+    
+    pub = get_article_info(soup)
+    if int(pub.num_citations) > 0:
+        get_citation_info(pub, int(pub.num_citations), soup)
+    return pub
+
+#if len(sys.argv) != 2:
+#    sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
+#    exit(1)
+#url = sys.argv[1]
+#pub = input(url)
+#print_pub_info(pub)
diff --git a/input_old/pub.py b/input_old/pub.py
new file mode 100644
index 0000000000000000000000000000000000000000..13b90e804cd485813b731385b319b3077a017dd2
--- /dev/null
+++ b/input_old/pub.py
@@ -0,0 +1,32 @@
+class Publication:
+    #_registry = []
+    #_citations = []
+    #_references = []
+    
+    def __init__(self, title, publication_date, contributors, doi_url, 
+                 subjects, num_citations):
+        #self._registry.append(self)
+        self.title = title
+        self.publication_date = publication_date
+        self.contributors = contributors
+        self.doi_url = doi_url
+        self.subjects = subjects
+        self.num_citations = num_citations
+        self.num_references = num_references
+    	self._citations = []
+    	self._references = []
+
+class Citation:
+    def __init__(self, title, journal, contributors, doi_url):
+        self.title = title
+        self.journal = journal
+        self.contributors = contributors
+        self.doi_url = doi_url
+
+class References:
+    def __init__(self, title, journal, contributors, doi_url):
+        self.title = title
+        self.journal = journal
+        self.contributors = contributors
+        self.doi_url = doi_url
+
diff --git a/input_old/test.py b/input_old/test.py
new file mode 100755
index 0000000000000000000000000000000000000000..dc623ca182691e9e06a6713a4d3d5dcf0bbf23c2
--- /dev/null
+++ b/input_old/test.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+from input_fj import input, print_pub_info
+import sys
+
+if len(sys.argv) != 3:
+    sys.stderr.write('Usage: {} <url> <url>\n'.format(sys.argv[0]))
+    exit(1)
+url = sys.argv[1]
+url2 = sys.argv[2]
+pub = input(url)
+print_pub_info(pub)
+pub2 = input(url2)
+print_pub_info(pub2)
+
diff --git a/input_old/x b/input_old/x
new file mode 100644
index 0000000000000000000000000000000000000000..c8ade9d56a520a3ac57e5eadce8b81bb3e63c0dd
--- /dev/null
+++ b/input_old/x
@@ -0,0 +1,234 @@
+Article title:    Feasibility of Active Machine Learning for Multiclass Compound Classification
+Publication date: January 7, 2016
+DOI-URL:          https://doi.org/10.1021/acs.jcim.5b00332
+
+Subjects:
+Algorithms, Molecules, Drug discovery, Screening assays, Receptors
+
+Contributors:
+Tobias Lang, Florian Flachsenberg, Ulrike von Luxburg, Matthias Rarey
+
+This publication is cited by the following 30 publications:
+
+
+    Title:        Concepts of Artificial Intelligence for Computer-Assisted Drug Discovery 
+    Journal:      Chemical Reviews
+    Contributors: Xin Yang, Yifei Wang, Ryan Byrne, Gisbert Schneider, Shengyong Yang. 
+    DOI-URL:      https://doi.org/10.1021/acs.chemrev.8b00728
+            
+
+    Title:        De Novo Molecule Design by Translating from Reduced Graphs to SMILES 
+    Journal:      Journal of Chemical Information and Modeling
+    Contributors: Peter Pogány, Navot Arad, Sam Genway, Stephen D. Pickett. 
+    DOI-URL:      https://doi.org/10.1021/acs.jcim.8b00626
+            
+
+    Title:        Designing Algorithms To Aid Discovery by Chemical Robots 
+    Journal:      ACS Central Science
+    Contributors: Alon B. Henson, Piotr S. Gromski, Leroy Cronin. 
+    DOI-URL:      https://doi.org/10.1021/acscentsci.8b00176
+            
+
+    Title:        Modeling Kinase Inhibition Using Highly Confident Data Sets 
+    Journal:      Journal of Chemical Information and Modeling
+    Contributors: Sorin Avram, Alina Bora, Liliana Halip, Ramona Curpăn. 
+    DOI-URL:      https://doi.org/10.1021/acs.jcim.7b00729
+            
+
+    Title:        Predictive Models for Fast and Effective Profiling of Kinase Inhibitors 
+    Journal:      Journal of Chemical Information and Modeling
+    Contributors: Alina  Bora, Sorin  Avram, Ionel  Ciucanu, Marius  Raica, and Stefana  Avram  . 
+    DOI-URL:      https://doi.org/10.1021/acs.jcim.5b00646
+            
+
+    Title:        Evaluation of categorical matrix completion algorithms: toward improved active learning for drug discovery 
+    Journal:      Bioinformatics
+    Contributors: Huangqingbo  Sun, Robert F  Murphy, . 
+    DOI-URL:      https://doi.org/10.1093/bioinformatics/btab322
+            
+
+    Title:        An Artificial Intelligence Approach Based on Hybrid CNN-XGB Model to Achieve High Prediction Accuracy through Feature Extraction, Classification and Regression for Enhancing Drug Discovery in Biomedicine 
+    Journal:      International Journal of Biology and Biomedical Engineering
+    Contributors: Mukesh  Madanan, Biju T.  Sayed, Nurul Akhmal  Mohd Zulkefli, Nitha C.  Velayudhan. 
+    DOI-URL:      https://doi.org/10.46300/91011.2021.15.22
+            
+
+    Title:        Artificial Intelligence in Medicinal Chemistry 
+    Journal:      
+    Contributors: Edward  Griffen, Alexander  Dossetter, Andrew  Leach, Shane  Montague. 
+    DOI-URL:      https://doi.org/10.1002/0471266949.bmc267
+            
+
+    Title:        Practical Chemogenomic Modeling and Molecule Discovery Strategies Unveiled by Active Learning 
+    Journal:      
+    Contributors: J.B.  Brown. 
+    DOI-URL:      https://doi.org/10.1016/B978-0-12-801238-3.11533-8
+            
+
+    Title:        Machine learning phases and criticalities without using real data for training 
+    Journal:      Physical Review B
+    Contributors: D.-R.  Tan, F.-J.  Jiang. 
+    DOI-URL:      https://doi.org/10.1103/PhysRevB.102.224434
+            
+
+    Title:        Active learning effectively identifies a minimal set of maximally informative and asymptotically performant cytotoxic structure–activity patterns in NCI-60 cell lines 
+    Journal:      RSC Medicinal Chemistry
+    Contributors: Takumi  Nakano, Shunichi  Takeda, J.B.  Brown. 
+    DOI-URL:      https://doi.org/10.1039/D0MD00110D
+            
+
+    Title:        Active learning efficiently converges on rational limits of toxicity prediction and identifies patterns for molecule design 
+    Journal:      Computational Toxicology
+    Contributors: Ahsan  Habib Polash, Takumi  Nakano, Christin  Rakers, Shunichi  Takeda, J.B.  Brown. 
+    DOI-URL:      https://doi.org/10.1016/j.comtox.2020.100129
+            
+
+    Title:        Practical considerations for active machine learning in drug discovery 
+    Journal:      Drug Discovery Today: Technologies
+    Contributors: Daniel  Reker. 
+    DOI-URL:      https://doi.org/10.1016/j.ddtec.2020.06.001
+            
+
+    Title:        Designing compact training sets for data-driven molecular property prediction through optimal exploitation and exploration 
+    Journal:      Molecular Systems Design & Engineering
+    Contributors: Bowen  Li, Srinivas  Rangarajan. 
+    DOI-URL:      https://doi.org/10.1039/C9ME00078J
+            
+
+    Title:        Applicability Domain of Active Learning in Chemical Probe Identification: Convergence in Learning from Non-Specific Compounds and Decision Rule Clarification 
+    Journal:      Molecules
+    Contributors: Ahsan Habib  Polash, Takumi  Nakano, Shunichi  Takeda, J.B.  Brown. 
+    DOI-URL:      https://doi.org/10.3390/molecules24152716
+            
+
+    Title:        Capturing and applying knowledge to guide compound optimisation 
+    Journal:      Drug Discovery Today
+    Contributors: Matthew  Segall, Tamsin  Mansley, Peter  Hunt, Edmund  Champness. 
+    DOI-URL:      https://doi.org/10.1016/j.drudis.2019.02.004
+            
+
+    Title:        A novel graph kernel on chemical compound classification 
+    Journal:      Journal of Bioinformatics and Computational Biology
+    Contributors: Qiangrong  Jiang, Jiajia  Ma. 
+    DOI-URL:      https://doi.org/10.1142/S0219720018500269
+            
+
+    Title:        Accelerating Drug Discovery Using Convolution Neural Network Based Active Learning 
+    Journal:      
+    Contributors: Pengfei  Liu, Kwong-Sak  Leung. 
+    DOI-URL:      https://doi.org/10.1109/TENCON.2018.8650298
+            
+
+    Title:        An Adaptive Lightweight Security Framework Suited for IoT 
+    Journal:      
+    Contributors: Menachem  Domb. 
+    DOI-URL:      https://doi.org/10.5772/intechopen.73712
+            
+
+    Title:        Adaptive mining and model building of medicinal chemistry data with a multi-metric perspective 
+    Journal:      Future Medicinal Chemistry
+    Contributors: JB  Brown. 
+    DOI-URL:      https://doi.org/10.4155/fmc-2018-0188
+            
+
+    Title:        Chemogenomic Active Learning's Domain of Applicability on Small, Sparse qHTS Matrices: A Study Using Cytochrome P450 and Nuclear Hormone Receptor Families 
+    Journal:      ChemMedChem
+    Contributors: Christin  Rakers, Rifat Ara  Najnin, Ahsan Habib  Polash, Shunichi  Takeda, J.B.  Brown. 
+    DOI-URL:      https://doi.org/10.1002/cmdc.201700677
+            
+
+    Title:        Automating drug discovery 
+    Journal:      Nature Reviews Drug Discovery
+    Contributors: Gisbert  Schneider. 
+    DOI-URL:      https://doi.org/10.1038/nrd.2017.232
+            
+
+    Title:        Classifiers and their Metrics Quantified 
+    Journal:      Molecular Informatics
+    Contributors: J. B.  Brown. 
+    DOI-URL:      https://doi.org/10.1002/minf.201700127
+            
+
+    Title:        Active Search for Computer-aided Drug Design 
+    Journal:      Molecular Informatics
+    Contributors: Dino  Oglic, Steven A.  Oatley, Simon J. F.  Macdonald, Thomas  Mcinally, Roman  Garnett, Jonathan D.  Hirst, Thomas  Gärtner. 
+    DOI-URL:      https://doi.org/10.1002/minf.201700130
+            
+
+    Title:        Selection of Informative Examples in Chemogenomic Datasets 
+    Journal:      
+    Contributors: Daniel  Reker, J. B.  Brown. 
+    DOI-URL:      https://doi.org/10.1007/978-1-4939-8639-2_13
+            
+
+    Title:        The value of prior knowledge in machine learning of complex network systems 
+    Journal:      Bioinformatics
+    Contributors: Dana  Ferranti, David  Krane, David  Craft, . 
+    DOI-URL:      https://doi.org/10.1093/bioinformatics/btx438
+            
+
+    Title:        Lightweight adaptive Random-Forest for IoT rule generation and execution 
+    Journal:      Journal of Information Security and Applications
+    Contributors: Menachem  Domb, Elisheva  Bonchek-Dokow, Guy  Leshem. 
+    DOI-URL:      https://doi.org/10.1016/j.jisa.2017.03.001
+            
+
+    Title:        Active learning for computational chemogenomics 
+    Journal:      Future Medicinal Chemistry
+    Contributors: Daniel  Reker, Petra  Schneider, Gisbert  Schneider, JB  Brown. 
+    DOI-URL:      https://doi.org/10.4155/fmc-2016-0197
+            
+
+    Title:        Small Random Forest Models for Effective Chemogenomic Active Learning 
+    Journal:      Journal of Computer Aided Chemistry
+    Contributors: Christin  Rakers, Daniel  Reker, J.B.  Brown. 
+    DOI-URL:      https://doi.org/10.2751/jcac.18.124
+            
+
+    Title:        Large-Scale Off-Target Identification Using Fast and Accurate Dual Regularized One-Class Collaborative Filtering and Its Application to Drug Repurposing 
+    Journal:      PLOS Computational Biology
+    Contributors: Hansaim  Lim, Aleksandar  Poleksic, Yuan  Yao, Hanghang  Tong, Di  He, Luke  Zhuang, Patrick  Meng, Lei  Xie, . 
+    DOI-URL:      https://doi.org/10.1371/journal.pcbi.1005135
+            
+Article title:    Matched Molecular Series: Measuring SAR Similarity
+Publication date: May 1, 2017
+DOI-URL:          https://doi.org/10.1021/acs.jcim.6b00709
+
+Subjects:
+Substituents, Mathematical methods, Structure activity relationship, Biological databases
+
+Contributors:
+Emanuel S. R. Ehmki, Christian Kramer
+
+This publication is cited by the following 5 publications:
+
+
+    Title:        Matched Molecular Series Analysis for ADME Property Prediction 
+    Journal:      Journal of Chemical Information and Modeling
+    Contributors: Mahendra Awale, Sereina Riniker, Christian Kramer. 
+    DOI-URL:      https://doi.org/10.1021/acs.jcim.0c00269
+            
+
+    Title:        Approaches using AI in medicinal chemistry 
+    Journal:      
+    Contributors: Christian  Tyrchan, Eva  Nittinger, Dea  Gogishvili, Atanas  Patronov, Thierry  Kogej. 
+    DOI-URL:      https://doi.org/10.1016/B978-0-12-822249-2.00002-5
+            
+
+    Title:        Bioactivity Prediction Based on Matched Molecular Pair and Matched Molecular Series Methods 
+    Journal:      Current Pharmaceutical Design
+    Contributors: Xiaoyu  Ding, Chen  Cui, Dingyan  Wang, Jihui  Zhao, Mingyue  Zheng, Xiaomin  Luo, Hualiang  Jiang, Kaixian  Chen. 
+    DOI-URL:      https://doi.org/10.2174/1381612826666200427111309
+            
+
+    Title:        BRADSHAW: a system for automated molecular design 
+    Journal:      Journal of Computer-Aided Molecular Design
+    Contributors: Darren V. S.  Green, Stephen  Pickett, Chris  Luscombe, Stefan  Senger, David  Marcus, Jamel  Meslamani, David  Brett, Adam  Powell, Jonathan  Masson. 
+    DOI-URL:      https://doi.org/10.1007/s10822-019-00234-8
+            
+
+    Title:        The use of matched molecular series networks for cross target structure activity relationship translation and potency prediction 
+    Journal:      MedChemComm
+    Contributors: Christopher E.  Keefer, George  Chang. 
+    DOI-URL:      https://doi.org/10.1039/C7MD00465F
+            
diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..0dcc7391bd5a633a86841f6097f486017ae94dfa
--- /dev/null
+++ b/verarbeitung/Processing.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+"""
+Functions to generate a graph representing citations between multiple ACS/Nature journals
+
+"""
+
+__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
+__email__ = "cis-project2021@zbh.uni-hamburg.de"
+__status__ = "Production"
+#__copyright__ = ""
+#__credits__ = ["", "", "", ""]
+#__license__ = ""
+#__version__ = ""
+#__maintainer__ = ""
+
+from bs4 import BeautifulSoup as bs
+import requests as req
+import sys  
+from pathlib import Path
+from input_fj import input
+from input_test import input_test_func
+from json_demo import output_to_json
+
+# adds every publication from input list to graph structure
+# doi_input_list: list of publication dois from user
+def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
+    references_pub_obj_list = []
+    citations_pub_obj_list = []
+
+    for pub_doi in doi_input_list:
+
+        #checks if its a test and chooses input function accordingly
+        if(test_var):
+            pub = input_test_func(pub_doi)
+        else:
+            pub = input(pub_doi)
+
+        # checks if publication already exists in nodes
+        not_in_nodes = True
+        for node in nodes:                                              # checks if a pub is already in nodes
+            if (pub.doi_url == node.doi_url):
+                not_in_nodes = False
+                break
+        if (not_in_nodes):
+            nodes.append(pub)
+            pub.group = "input"
+        else:
+            doi_input_list.remove(pub_doi)
+
+        # inserts references as publication objects into list and 
+        # inserts first depth references into nodes/edges if maximum search depth > 0
+        for reference in create_graph_structure_references(pub, 0, search_depth_max, test_var):
+            references_pub_obj_list.append(reference)
+
+        # inserts citations as publication objects into list and 
+        # inserts first height citations into nodes if maximum search height > 0
+        for citation in create_graph_structure_citations(pub, 0, search_height_max, test_var):
+            citations_pub_obj_list.append(citation)
+
+    return(references_pub_obj_list, citations_pub_obj_list)
+        
+    
+# adds edges between citation and reference group
+def complete_inner_edges(test_var):
+    for node in nodes:
+        if (node.group == "depth"):
+            for citation in node.citations:
+                for cit in nodes:
+                    if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges):
+                        edges.append([citation.doi_url, node.doi_url])
+        if (node.group == "height"):
+            for reference in node.references:
+                for ref in nodes:
+                    if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges):
+                        edges.append([node.doi_url,reference.doi_url])
+
+
+
+# adds a node for every publication unknown
+# adds edges for references between publications     
+def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
+    references_pub_obj_list = []
+    for reference in pub.references:
+        not_in_nodes = True
+        for node in nodes:
+            # checks every reference for duplication 
+            if (reference.doi_url == node.doi_url):
+                not_in_nodes = False
+                break
+        if (not_in_nodes):
+            if (search_depth < search_depth_max):
+
+                #checks if its a test and chooses input function accordingly
+                if (test_var):
+                    reference_pub_obj = input_test_func(reference.doi_url)
+                else:
+                    reference_pub_obj = input(reference.doi_url)
+
+                reference_pub_obj.group = "depth"
+                nodes.append(reference_pub_obj)
+                edges.append([pub.doi_url,reference_pub_obj.doi_url])
+                references_pub_obj_list.append(reference_pub_obj)
+
+        # adds edge only if citation already exists           
+        elif [pub.doi_url,reference.doi_url] not in edges:
+            edges.append([pub.doi_url,reference.doi_url])  
+    return references_pub_obj_list 
+
+
+# recursive function to implement height-first-search on references
+# references_pub_obj_list: input list of references as publication objects
+# search_depth: current search_depth of height-first-search
+# search_depth_max: maximal search_depth for dfs
+def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):  
+    # adds next level to nodes/edges
+    for pub in references_pub_obj_list:
+        new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)    
+
+        # If the maximum height has not yet been reached, calls function recursivly with increased height     
+        if (search_depth < search_depth_max):
+            process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
+
+
+
+    
+# adds a node for every publication unknown
+# adds edges for citations between publications     
+def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
+    citations_pub_obj_list = []
+    for citation in pub.citations:
+        not_in_nodes = True
+        for node in nodes:
+            # checks every citation for duplication 
+            if (citation.doi_url == node.doi_url):
+                not_in_nodes = False
+                break
+        if (not_in_nodes):
+            if (search_height < search_height_max):
+
+                #checks if its a test and chooses input function accordingly
+                if (test_var):
+                    citation_pub_obj = input_test_func(citation.doi_url)
+                else:
+                    citation_pub_obj = input(citation.doi_url)
+
+                citation_pub_obj.group = "height"
+                nodes.append(citation_pub_obj)
+                edges.append([citation_pub_obj.doi_url,pub.doi_url])
+                citations_pub_obj_list.append(citation_pub_obj)
+
+        # adds only edge if citation already exists         
+        elif [citation.doi_url,pub.doi_url] not in edges:
+            edges.append([citation.doi_url,pub.doi_url])   
+    return citations_pub_obj_list
+
+
+
+# recursive function to implement height-first-search on citations
+# citations_pub_obj_list: input list of citations as publication objects
+# search_height: current search_height of height-first-search
+# search_height_max: maximal search_height for dfs
+def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):  
+    # adds next level to nodes/edges
+    for pub in citations_pub_obj_list:
+        new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var)   
+
+        # If the maximum height has not yet been reached, calls function recursivly with increased height 
+        if (search_height < search_height_max):
+            process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var)
+
+
+
+
+# main function to call. Needs as input:
+# doi_input_list:   input list of dois
+# search_height:    max search height to process to
+# search_depth:     max search depth to process to
+# test_var:         only needed for unit test as True, default is False
+def process_main(doi_input_list, search_height, search_depth, test_var = False):
+    # ERROR-Handling doi_array = NULL
+    if (len(doi_input_list) == 0):
+        print("Error, no input data")
+
+    # ERROR- if a negative number is entered for height
+    if (search_height < 0):
+        print("Error, search_height of search must be positive")
+
+    # ERROR- if a negative number is entered for depth
+    if (search_depth < 0):
+        print("Error, search_depth of search must be positive")       
+
+    # create empty array for the nodes
+    # create empty array for the edges
+    global nodes, edges
+    nodes = []
+    edges = []
+
+    # initializes nodes/edges from input and gets a list with publication objects for citations and references returned
+    references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
+
+    # function calls to begin recursive processing up to max depth/height
+    process_citations_rec(citations_obj_list, 1, search_height, test_var)
+    process_references_rec(references_obj_list, 1, search_depth, test_var)
+
+    # adds edges between reference group and citation group of known publications
+    complete_inner_edges(test_var)
+
+    # calls a skript to save nodes and edges of graph in .json file
+    output_to_json(nodes,edges)
+
+    # only for unit tests
+    if (test_var == True):
+        doi_nodes_list = []
+        for node in nodes:
+            doi_nodes_list.append(node.doi_url)
+        return(doi_nodes_list, edges)
+
+
+
+    
+# a function to print nodes and edges from a graph
+def print_graph(nodes, edges):
+    print("Knoten:\n")
+    for node in nodes:
+        print(node.title, "\n")
+    print("\nKanten:\n")
+    for edge in edges:
+        print(edge,"\n")
+   
+    
+# program test, because there is no connection to UI yet.
+def try_known_publications():
+    doi_list = []
+    doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+    #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+    doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
+    #arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
+
+    #arr.append('https://doi.org/10.1021/ci700007b')
+    #arr.append('https://doi.org/10.1021/acs.jcim.5b00292')
+    #url = sys.argv[1]
+    #arr.append[url]
+
+
+    nodes,edges = process_main(doi_list,2,2)
+
+    print_graph(nodes, edges)    
\ No newline at end of file
diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/Processing_unittest.py
new file mode 100644
index 0000000000000000000000000000000000000000..772d57204ce3374211d1d1fd3d08d279f085aac3
--- /dev/null
+++ b/verarbeitung/Processing_unittest.py
@@ -0,0 +1,66 @@
+import unittest
+from Processing import process_main
+
+class ProcessingTest(unittest.TestCase):
+     def testCycle(self):
+         nodes, edges = process_main(['doiz1'],1,1,True)
+         self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
+         self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
+
+         nodes, edges = process_main(['doiz1'],2,2,True)
+         self.assertCountEqual(nodes, ['doiz1', 'doiz2'])
+         self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
+
+    #def testBigCycle(self):
+
+    #def testEmptyHeight(self):
+
+    #def testEmptyDepth(self):
+
+     def testEmptyDepthHeight(self):
+         nodes, edges = process_main(['doi1'],0,0,True)
+         self.assertCountEqual(nodes,['doi1'])
+         self.assertCountEqual(edges, [])
+
+         nodes, edges = process_main(['doi1', 'doi2'],0,0,True)
+         self.assertCountEqual(nodes, ['doi1','doi2'])
+         self.assertCountEqual(edges, [['doi1', 'doi2']])
+
+         nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True)
+         self.assertCountEqual(nodes, ['doi1','doi2', 'doi3'])
+         self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
+
+
+     def testInnerEdges(self):
+        nodes, edges = process_main(['doi_ie1'],1,1,True)
+        self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3'])
+        self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
+     
+     def testRightHeight(self):
+          nodes, edges = process_main(['doi_h01'],1,0,True)
+          self.assertCountEqual(nodes,['doi_h01'])
+          self.assertCountEqual(edges, [])
+
+          nodes, edges = process_main(['doi_h02'],1,0,True)
+          self.assertCountEqual(nodes,['doi_h02','doi_h1'])
+          self.assertCountEqual(edges, [['doi_h1','doi_h02']])
+
+          nodes, edges = process_main(['doi_h02'],2,0,True)
+          self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2'])
+          self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
+
+     def testRightDepth(self):
+          nodes, edges = process_main(['doi_d01'],0,1,True)
+          self.assertCountEqual(nodes,['doi_d01'])
+          self.assertCountEqual(edges, [])
+
+          nodes, edges = process_main(['doi_d02'],0,1,True)
+          self.assertCountEqual(nodes,['doi_d02','doi_d1'])
+          self.assertCountEqual(edges, [['doi_d02','doi_d1']])
+
+          nodes, edges = process_main(['doi_d02'],0,2,True)
+          self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2'])
+          self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/verarbeitung/__pycache__/Processing.cpython-36.pyc b/verarbeitung/__pycache__/Processing.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eb6d8a0418a1340b746f2f664997515622356d8a
Binary files /dev/null and b/verarbeitung/__pycache__/Processing.cpython-36.pyc differ
diff --git a/verarbeitung/__pycache__/Processing.cpython-38.pyc b/verarbeitung/__pycache__/Processing.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..63ac529316c848e829cd83ef44ec749e5903bf9e
Binary files /dev/null and b/verarbeitung/__pycache__/Processing.cpython-38.pyc differ
diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54c63251bbf3affbdd176d3d55f4956c2fc08406
Binary files /dev/null and b/verarbeitung/__pycache__/Processing.cpython-39.pyc differ
diff --git a/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc b/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9ce1023e6ea54e1b04b37ad5a1fd08115d5f52a4
Binary files /dev/null and b/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc differ
diff --git a/verarbeitung/__pycache__/input_fj.cpython-36.pyc b/verarbeitung/__pycache__/input_fj.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04312c91f0a7675651e99a2a6c10a2c9da146758
Binary files /dev/null and b/verarbeitung/__pycache__/input_fj.cpython-36.pyc differ
diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..515ab99c01a5ce78bb5bb6de554a4dae3ffe4b4b
Binary files /dev/null and b/verarbeitung/__pycache__/input_fj.cpython-38.pyc differ
diff --git a/verarbeitung/__pycache__/input_fj.cpython-39.pyc b/verarbeitung/__pycache__/input_fj.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..175f9ebbfdf5f3313196b4f10aa01dc2e8e20509
Binary files /dev/null and b/verarbeitung/__pycache__/input_fj.cpython-39.pyc differ
diff --git a/verarbeitung/__pycache__/input_test.cpython-36.pyc b/verarbeitung/__pycache__/input_test.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..85878d6d127d9d2bd5efe9130672d982bb70c5fa
Binary files /dev/null and b/verarbeitung/__pycache__/input_test.cpython-36.pyc differ
diff --git a/verarbeitung/__pycache__/input_test.cpython-38.pyc b/verarbeitung/__pycache__/input_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df395212453392e135532b12396cd4c30a92ea05
Binary files /dev/null and b/verarbeitung/__pycache__/input_test.cpython-38.pyc differ
diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..68e42fd6a47a02787524c68816a42574834931d2
Binary files /dev/null and b/verarbeitung/__pycache__/input_test.cpython-39.pyc differ
diff --git a/verarbeitung/__pycache__/json_demo.cpython-36.pyc b/verarbeitung/__pycache__/json_demo.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..04acef5f40630ee2c7b6e887e33dc740b5e16a74
Binary files /dev/null and b/verarbeitung/__pycache__/json_demo.cpython-36.pyc differ
diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a1e7ba987775a20fddaa4a8f846bb238670d6a1
Binary files /dev/null and b/verarbeitung/__pycache__/json_demo.cpython-38.pyc differ
diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e31ce337645d5282ddab11668bc6d745735f9f8
Binary files /dev/null and b/verarbeitung/__pycache__/json_demo.cpython-39.pyc differ
diff --git a/verarbeitung/__pycache__/unittest.cpython-36.pyc b/verarbeitung/__pycache__/unittest.cpython-36.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..245eb7f9be9221daa930d9fa83c77368ba463af7
Binary files /dev/null and b/verarbeitung/__pycache__/unittest.cpython-36.pyc differ
diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..44361c4b095f1c4fb0fce1868498d0e9da32f551
--- /dev/null
+++ b/verarbeitung/input_test.py
@@ -0,0 +1,82 @@
+class Publication:
+    def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
+        self.doi_url = doi_url
+        self.title = title
+        self.contributors = contributors
+        self.journal = journal
+        self.publication_date = publication_date
+        if references is None:
+            self.references = []
+        else:
+            self.references = ref(references)
+        if citations is None:
+            self.citations = []
+        else: 
+            self.citations = cit(citations)
+        self.group = group
+
+
+class Citation:
+    def __init__(self,doi_url, title, contributors, journal, publication_date):
+        self.doi_url = doi_url
+        self.title = title
+        self.contributors = contributors
+        self.journal = journal
+        self.publication_date = publication_date
+
+class Reference:
+    def __init__(self,doi_url, title, contributors, journal, publication_date):
+        self.doi_url = doi_url
+        self.title = title
+        self.contributors = contributors
+        self.journal = journal
+        self.publication_date = publication_date
+
+def input_test_func(pub_doi):
+    for array in list_of_arrays:
+        if pub_doi == array[0]:
+            pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7])
+            return pub
+
+
+def cit(list_doi):
+    cits = []
+    for doi_url in list_doi:
+        for array in list_of_arrays:
+            if doi_url == array[0]:
+                cits.append(Citation(array[0], array[1], array[2], array[3], array[4]))
+    return cits
+
+def ref(list_doi):
+    refs = []
+    for doi_url in list_doi:
+        for array in list_of_arrays:
+            if doi_url == array[0]:
+                refs.append(Citation(array[0], array[1], array[2], array[3], array[4]))
+    return refs
+
+
+beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], '']
+beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], '']
+beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], '']
+
+zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], '']
+zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], '']
+
+inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], '']
+inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], '']
+inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], '']
+
+right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], '']
+right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], '']
+right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], '']
+right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], '']
+right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], '']
+
+right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], '']
+right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], '']
+right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], '']
+right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], '']
+right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], '']
+
+list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3]
diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9f618d1a2dcac13ca51a530f365d40aa226bc11
--- /dev/null
+++ b/verarbeitung/json_demo.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+import json
+from input_fj import input
+
+"""
+Functions that format the computed graph to match the interface to the output-part
+
+"""
+
+# creates a list that contains a dictionary for each node
+# the dictionaries store the values for the attributes
+def format_nodes(V):
+    list_of_node_dicts = list()
+    for node in V:
+        new_dict = dict()
+        new_dict["name"] = node.title
+        new_dict["author"] = node.contributors
+        new_dict["year"] = node.publication_date
+        new_dict["journal"] = node.journal
+        new_dict["doi"] = node.doi_url
+        new_dict["group"] = node.group
+        list_of_node_dicts.append(new_dict)
+    return list_of_node_dicts
+    
+# creates a list that contains a disctionary for each edge
+# the dictionaries contain the source as keys and the target as values   
+def format_edges(E):
+    list_of_edge_dicts = list()
+    for edge in E:
+        new_dict_2 = dict()
+        new_dict_2["source"] = edge[0]
+        new_dict_2["target"] = edge[1]
+        list_of_edge_dicts.append(new_dict_2)
+    return list_of_edge_dicts
+   
+# combine the lists of nodes and edges to a dictionary and saves it to a json file
+def output_to_json(V,E):
+    dict_of_all = dict()
+    list_of_node_dicts = format_nodes(V)
+    list_of_edge_dicts = format_edges(E)
+    dict_of_all["nodes"] = list_of_node_dicts
+    dict_of_all["links"] = list_of_edge_dicts
+    with open('json_text.json','w') as outfile:
+        json.dump(dict_of_all, outfile)
+  
+#knoten = ["doi1", "doi2", "doi3"]
+#kanten = [[1,2],[3,4],[5,6]]
+#output_to_json(knoten,kanten)
+
diff --git "a/verarbeitung/n\303\266tige Tests.txt" "b/verarbeitung/n\303\266tige Tests.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..95563280436fbf6b9b8702dffef6f32e213f5a16
--- /dev/null
+++ "b/verarbeitung/n\303\266tige Tests.txt"	
@@ -0,0 +1,4 @@
+Zyklus
+großer Zyklus
+Innere Kanten vervollständigen
+