Aufraeumen

5b246d2c · Malte Schokolowski · 396593b9 · 5b246d2c · 396593b9 · 396593b9
Commit 5b246d2c authored 3 years ago by Malte Schokolowski
--- a/verarbeitung/Processing_test_doi_überarbeitet.py
+++ b/verarbeitung/Processing_test_doi_überarbeitet.py
@@ -10,12 +10,13 @@ import requests as req
 import sys  
 from pathlib import Path
 from input_fj import input
+from json_demo import output_to_json
-def process_main(array, depth):
+def process_main(doi_input_array, depth):
    # ERROR-Handling doi_array = NULL
-    if (len(array) == 0):
+    if (len(doi_input_array) == 0):
        print("Error, no input data")
    # ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird
@@ -30,14 +31,21 @@ def process_main(array, depth):
    edges = []
    # Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt.
-    for pub in array:
+    for pub_doi in doi_input_array:
-        if (pub not in nodes):
+        pub = input(pub_doi)
+        not_in_nodes = True
+        for node in nodes:
+            if (pub.doi_url == node.doi_url):
+                not_in_nodes = False
+                break
+        if (not_in_nodes):
            nodes.append(pub)
        else:
-            array.remove(pub)
+            doi_input_array.remove(pub_doi)
-    process_rec_depth(array, 0, depth)
+    process_rec_depth(doi_input_array, 0, depth)
+    output_to_json(nodes,edges)
    return(nodes,edges)
@@ -56,9 +64,14 @@ def process_rec_depth(array, depth, depth_max):
            # Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe 
            # noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich 
            # wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert. 
-            if (citation.doi_url not in nodes):
+            not_in_nodes = True
+            for node in nodes:
+                if (citation.doi_url == node.doi_url):
+                    not_in_nodes = False
+                    break
+            if (not_in_nodes):
                if (depth <= depth_max):
-                    nodes.append(citation.doi_url)
+                    nodes.append(citation)
                    edges.append([pub.doi_url,citation.doi_url])
            # Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation 
@@ -85,8 +98,8 @@ def process_rec_depth(array, depth, depth_max):
 # Programmtest, weil noch keine Verbindung zum Input besteht.
 arr = []
 arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-#arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
+arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
 #arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
 #arr.append('https://doi.org/10.1021/ci700007b')
@@ -97,8 +110,8 @@ arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
 nodes,edges = process_main(arr,1)
 print("Knoten:\n")
-for vortex in nodes:
+for node in nodes:
-    print(vortex, "\n")
+    print(node.title, "\n")
 print("\nKanten:\n")
 for edge in edges:
    print(edge,"\n")
\ No newline at end of file
--- a/verarbeitung/Processing_test.py
+++ b/verarbeitung/Processing_test.py
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Nov  3 16:54:43 2021
-@author: Malte Schokolowski
-"""
-from bs4 import BeautifulSoup as bs
-import requests as req
-import sys  
-from pathlib import Path
-import input_test as inp
-def process_main(array, depth):
-    #ERROR-Handling doi_array = NULL, Tiefe < 0 oder 1 ??? +
-    if (depth < 0):
-        print("Error, depth of search must be positive")
-    # leeres Array für die Knoten wird erstellt
-    # leeres Array für die Kanten wird erstellt
-    global V, E
-    V = []
-    E = []
-    # Füge in Knoten-Array alle Starterknoten ein
-    for pub_doi in array:
-        pub = inp.input(pub_doi)
-        V.append(pub)
-    #print("\n")
-    process_rec(array, 0, depth)
-    return(V,E)
-def process_rec(array, depth, depth_max):  
-    depth += 1
-    for pub_doi in array:
-        # Input aufrufen und speichern   
-        pub = inp.input(pub_doi)
-        # Klasseninstanz bestehend aus u.a.
-        # Name, Autoren, DOI, Jahr, 
-        # was_wir_zitiert_haben, wo_wir_zitiert_wurden
-        for citation in pub._citations:
-            #print(pub.doi_url, ".\t", citation.doi_url, "\n")
-            #Knoten j erstellen, wenn noch unbekannt 
-            if (citation not in V):
-                if (depth <= depth_max):
-                    V.append(citation)
-                    #print(citation.doi_url, "\n")
-                    E.append([pub,citation])
-                    #print(pub.doi_url, ".\t", citation.doi_url, "\n")
-            else:
-                E.append([pub,citation]) # Kante erstellen, wenn citation bekannt, also wenn beide im Input sind oder bei Zyklus
-                    #print(pub.doi_url, ".\t", citation.doi_url, "\n")
-        #for k in wo_wir_zitiert_wurden:
-            #if (i != k):
-                #Knoten k erstellen, wenn noch unbekannt
-                #Kante erstellen von k nach i 
-        if (depth < depth_max):
-            cit_arr = []
-            for citation in pub._citations:
-                if ("acs" in citation.doi_url):
-                    cit_arr.append(citation.doi_url)
-            process_rec(cit_arr, depth, depth_max)
-    # Knoten- und Kantenmenge zurückgeben
-    # {1,2,3,4,5} oder
-    # {{1="paper1",0}, {2 = "paper2"},1} oder
-    # {1="paper1", 2 = "paper2"}
-    # {(1,2),(2,3),(2,4)}
-arr = []
-arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
-#arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
-#arr.append('https://doi.org/10.1021/acs.accounts.1c00440')
-#arr.append('https://doi.org/10.1021/ci700007b')
-#arr.append('https://doi.org/10.1021/acs.jcim.5b00292')
-#url = sys.argv[1]
-#arr.append[url]
-V,E = process_main(arr,1)
-for vortex in V:
-    #print(vortex, "\n")
-    print(vortex.doi_url, "\n")
-print("\n")
-for i in range(len(E)):
-    #print(edge,"\n")
-    print(E[i][0].doi_url, ", ",E[i][1].doi_url, "\n")
\ No newline at end of file
--- a/verarbeitung/Processing_test_doi.py
+++ b/verarbeitung/Processing_test_doi.py
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Nov  3 16:54:43 2021
-@author: Malte Schokolowski
-"""
-from bs4 import BeautifulSoup as bs
-import requests as req
-import sys  
-from pathlib import Path
-import input_test as inp
-def process_main(array, depth):
-    #ERROR-Handling doi_array = NULL, Tiefe < 0 oder 1 ??? +
-    if (depth < 0):
-        print("Error, depth of search must be positive")
-    # leeres Array für die Knoten wird erstellt
-    # leeres Array für die Kanten wird erstellt
-    global V, E
-    V = []
-    E = []
-    # Füge in Knoten-Array alle Starterknoten ein
-    for pub in array:
-        V.append(pub)
-    #print("\n")
-    process_rec(array, 0, depth)
-    return(V,E)
-def process_rec(array, depth, depth_max):  
-    depth += 1
-    for pub_doi in array:
-        # Input aufrufen und speichern
-        #print(pub_doi)
-        pub = inp.input(pub_doi)
-        #for cit in pub._citations:
-            #print(pub.doi_url, cit.doi_url)
-        # Klasseninstanz bestehend aus u.a.
-        # Name, Autoren, DOI, Jahr, 
-        # was_wir_zitiert_haben, wo_wir_zitiert_wurden
-        for citation in pub._citations:
-            #print(pub.doi_url, ".\t", citation.doi_url, "\n")
-            #Knoten j erstellen, wenn noch unbekannt 
-            if (citation.doi_url not in V):
-                if (depth <= depth_max):
-                    V.append(citation.doi_url)
-                    #print(citation.doi_url, "\n")
-                    E.append([pub.doi_url,citation.doi_url])
-                    #print(pub.doi_url, ".\t", citation.doi_url, "\n")
-            else:
-                E.append([pub.doi_url,citation.doi_url]) # Kante erstellen, wenn citation bekannt, also wenn beide in gleicher Tiefe sind oder bei Zyklus
-                    #print(pub.doi_url, ".\t", citation.doi_url, "\n")
-        #for k in wo_wir_zitiert_wurden:
-            #if (i != k):
-                #Knoten k erstellen, wenn noch unbekannt
-                #Kante erstellen von k nach i 
-        if (depth < depth_max):
-            cit_arr = []
-            for citation in pub._citations:
-                if ("acs" in citation.doi_url):
-                    cit_arr.append(citation.doi_url)
-            process_rec(cit_arr, depth, depth_max)
-        #else:
-            #print("--- %s seconds ---" % (time.time() - start_time))
-        #process_rec(wo_wir_zitiert_wurden, depth -1)'''
-    # Knoten- und Kantenmenge zurückgeben
-    # {1,2,3,4,5} oder
-    # {{1="paper1",0}, {2 = "paper2"},1} oder
-    # {1="paper1", 2 = "paper2"}
-    # {(1,2),(2,3),(2,4)}
-arr = []
-arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
-arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
-#arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
-#arr.append('https://doi.org/10.1021/ci700007b')
-#arr.append('https://doi.org/10.1021/acs.jcim.5b00292')
-#url = sys.argv[1]
-#arr.append[url]
-V,E = process_main(arr,2)
-for vortex in V:
-    print(vortex, "\n")
-print("\n")
-for edge in E:
-    print(edge,"\n")
\ No newline at end of file
--- a/verarbeitung/input_fj.py
+++ b/verarbeitung/input_fj.py
-#!/usr/bin/env python3
-"""
-Functions for information retrieval of articles from the ACS journal JCIM
-"""
-__author__ = "Florian Jochens"
-__email__ = "fj@andaco.de"
-__status__ = "Production"
-#__copyright__ = ""
-#__credits__ = ["", "", "", ""]
-#__license__ = ""
-#__version__ = ""
-#__maintainer__ = ""
-from bs4 import BeautifulSoup as bs
-import requests as req
-import sys  
-from pathlib import Path
-class Publication:
-    #_registry = []
-    _citations = []
-    def __init__(self, title, publication_date, contributors, doi_url, 
-                 subjects, num_citations):
-        #self._registry.append(self)
-        self.title = title
-        self.publication_date = publication_date
-        self.contributors = contributors
-        self.doi_url = doi_url
-        self.subjects = subjects
-        self.num_citations = num_citations
-class Citation:
-    def __init__(self, title, journal, contributors, doi_url):
-        self.title = title
-        self.journal = journal
-        self.contributors = contributors
-        self.doi_url = doi_url
-def get_article_info(soup):
-    header = soup.find('div', class_ = 'article_header-left pull-left')
-    article_title = header.find('span', class_ = 'hlFld-Title').text
-    publication_date = header.find('span', class_ = 'pub-date-value').text
-    for link in header.find('div', class_ = 'article_header-doiurl'):
-        doi_url = link.get('href')
-    subs = header.find('div', class_ = 'article_header-taxonomy')
-    subjects = []
-    for sub in subs.find_all('a'):
-        subjects.append(sub.get('title'))
-    cons = header.find('ul', class_ = 'loa')
-    contributors = []
-    for con in cons.find_all('span', class_ = 'hlFld-ContribAuthor'):
-        contributors.append(con.text)
-    numc = header.find('div', class_ = 'articleMetrics_count')
-    if not numc.a:
-        num_citations = 0
-    else:
-        num_citations = numc.a.text
-    pub = Publication(article_title, publication_date, contributors, doi_url,
-                      subjects, num_citations)
-    return pub
-def get_download_url():
-    export = soup.find('div', class_ = 'cit-download-dropdown_content')
-    url = 'https://pubs.acs.org'
-    for link in export.find_all('a'):
-        if link.get('title') == 'Citation and references':
-            url += link.get('href')     
-    print(url)
-    return url
-def download(url): # Download citation and references file
-    if url.find('='):
-        filename = url.rsplit('=', 1)[1]
-    path = Path(('./files/' + filename))
-    if path.is_file():
-        print("File already exists")
-    else:
-        print("File does not exist")
-def get_citation_info(pub, num_citations, soup):
-    pub._citations = []
-    details = soup.find('ol', class_ = 'cited-content_cbyCitation')
-    titles = [] 
-    for title in details.find_all('span', 
-            class_ = 'cited-content_cbyCitation_article-title'):
-        titles.append(title.text.replace('.', ''))
-    journal_names = []
-    for name in details.find_all('span',
-            class_ = 'cited-content_cbyCitation_journal-name'):
-        journal_names.append(name.text)
-    doi_urls = []
-    for url in details.find_all('a'):
-        doi_urls.append(url.get('href'))
-    contributors = []
-    for contrib in details.find_all('span', 
-            class_ = 'cited-content_cbyCitation_article-contributors'):
-        contributors.append(contrib.text)
-    for i in range(0, int(num_citations)):
-        pub._citations.append(Citation(titles[i], journal_names[i], 
-                              contributors[i], doi_urls[i]))
-def print_pub_info(pub):
-    print(f'''Article title:    {pub.title}
-Publication date: {pub.publication_date}
-DOI-URL:          {pub.doi_url}
-Subjects:''')
-    print(*(pub.subjects), sep = ", ")
-    print('\nContributors:')
-    print(*(pub.contributors), sep = ", ")
-    if int(pub.num_citations) > 0:
-        if int(pub.num_citations) == 1:
-            print(f'\nThis publication is cited by the following publication:\n')
-        else:
-            print(f'\nThis publication is cited by the following {pub.num_citations} publications:\n')
-        for citation in pub._citations:
-            print(f'''
-    Title:        {citation.title}
-    Journal:      {citation.journal}
-    Contributors: {citation.contributors}
-    DOI-URL:      {citation.doi_url}
-            ''')
-    else:
-        print('\nThis publication is not cited by any other publication.')
-def input(url):
-    html_text = req.get(url).text
-    soup = bs(html_text, 'html.parser')
-    pub = get_article_info(soup)
-    if int(pub.num_citations) > 0:
-        get_citation_info(pub, int(pub.num_citations), soup)
-    return pub
-#if len(sys.argv) != 2:
-#    sys.stderr.write('Usage: {} <url>\n'.format(sys.argv[0]))
-#    exit(1)
-#url = sys.argv[1]
-#pub = input(url)
-#print_pub_info(pub)
--- a/verarbeitung/json_text.txt
+++ b/verarbeitung/json_text.txt
-{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "doi": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems ", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"name": "Disconnected Maximum Common Substructures under Constraints ", "doi": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design ", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms ", "doi": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"name": "Machine learning accelerates quantum mechanics predictions of molecular crystals ", "doi": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID ", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies ", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"name": "BonMOLi\u00e8re: Small-Sized Libraries of Readily Purchasable Compounds, Optimized to Produce Genuine Hits in Biological Screens across the Protein Space ", "doi": "https://doi.org/10.3390/ijms22157773"}, {"name": "Accelerating high-throughput virtual screening through molecular pool-based active learning ", "doi": "https://doi.org/10.1039/D0SC06805E"}, {"name": "Compound Screening ", "doi": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.0c00741"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1016/j.physrep.2021.08.002"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acsmedchemlett.1c00251"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jmedchem.1c00416"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.3390/ijms22157773"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1039/D0SC06805E"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1016/B978-0-12-820472-6.00078-5"}]}
\ No newline at end of file
--- a/verarbeitung/json_text_json.txt
+++ b/verarbeitung/json_text_json.txt
--- a/verarbeitung/processing_pseudo.py
+++ b/verarbeitung/processing_pseudo.py
-# -*- coding: utf-8 -*-
-"""
-@author: Malte Schokolowski
-"""
-def process_main(doi_array, depth):
-    #ERROR-Handling doi_array = NULL, Tiefe < 0 oder 1 ??? 
-    # leeres Array für die Knoten wird erstellt
-    # leeres Array für die Kanten wird erstellt
-    # Füge in Knoten-Array alle Starterknoten ein
-    process_rec(doi_array, depth)
-def process_rec(doi_array, depth):  
-    for i in range(len(doi_array)):
-        # Input aufrufen und speichern
-        # Klasseninstanz bestehend aus u.a.
-        # Name, Autoren, DOI, Jahr, 
-        # was_wir_zitiert_haben, wo_wir_zitiert_wurden
-        for j in range(len(was_wir_zitiert_haben)):
-            #Knoten j erstellen, wenn noch unbekannt
-            #Kante erstellen von i nach j
-        for k in range(len(wo_wir_zitiert_wurden)):
-            if (i != k):
-                #Knoten k erstellen, wenn noch unbekannt
-                #Kante erstellen von k nach i 
-        process_rec(was_wir_zitiert_haben, depth-1)
-        process_rec(wo_wir_zitiert_wurden, depth -1)
-    # Knoten- und Kantenmenge zurückgeben
-    # {1,2,3,4,5} oder
-    # {{1="paper1",0}, {2 = "paper2"},1} oder
-    # {1="paper1", 2 = "paper2"}
-    # {(1,2),(2,3),(2,4)}
\ No newline at end of file