Große, Judith
--- a/verarbeitung/Processing_test_doi_überarbeitet.py 0 → 100644

+ 105

− 0
+++ b/verarbeitung/Processing_test_doi_überarbeitet.py 0 → 100644

+ 105

− 0
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov  3 16:54:43 2021
+
+@author: Malte Schokolowski
+"""
+
+from bs4 import BeautifulSoup as bs
+import requests as req
+import sys  
+from pathlib import Path
+from input_fj import input
+
+
+
+def process_main(array, depth):
+    # ERROR-Handling doi_array = NULL
+    if (len(array) == 0):
+        print("Error, no input data")
+
+    # ERROR- wenn für die Tiefe eine negative Zahl eingegeben wird
+    if (depth < 0):
+        print("Error, depth of search must be positive")
+    
+
+    # Leeres Array für die Knoten(nodes) wird erstellt.
+    # Leeres Array für die Kanten(edges) wird erstellt.
+    global nodes, edges
+    nodes = []
+    edges = []
+    
+    # Jede Publikation aus dem Input-Array wird in den Knoten-Array(nodes) eingefügt.
+    for pub in array:
+        if (pub not in nodes):
+            nodes.append(pub)
+        else:
+            array.remove(pub)
+
+    process_rec_depth(array, 0, depth)
+
+    return(nodes,edges)
+    
+    
+def process_rec_depth(array, depth, depth_max):  
+    # Die Tiefe wird bei jedem rekursiven Aufruf um 1 erhöht.
+    depth += 1
+
+    # Für jede Publikation im Input-Array wird ein Klassenobjekt erstellt.
+    for pub_doi in array:
+        pub = input(pub_doi)
+
+        # Für jede citation, die in der entsprecheneden Klasseninstanz der Publikation gespeichert sind, 
+        # wird geprüft, ob diese bereits als Knoten existiert.
+        for citation in pub._citations:
+
+            # Wenn die citation noch nicht im Knoten-Array(nodes) existiert UND die maximale Tiefe 
+            # noch nicht erreicht wurde, wird diese als Knoten im Knoten-Array gespeichert. Zusätzlich 
+            # wird die Verbindung zur Publikation als Tupel im Kanten-Array(edges) gespeichert. 
+            if (citation.doi_url not in nodes):
+                if (depth <= depth_max):
+                    nodes.append(citation.doi_url)
+                    edges.append([pub.doi_url,citation.doi_url])
+
+            # Wenn die citaion bereits im Knoten-Array existiert, wird nur die Verbindung zur Publikation 
+            # als Tupel im Kanten-Array(edges) gespeichert.            
+            else:
+                edges.append([pub.doi_url,citation.doi_url])
+            
+        # Wenn die maximale Tiefe noch nicht erreicht wurde, werden alle citations aus der Publikation 
+        # in ein Array geschrieben und mit diesem die Funktion erneut aufgerufen.      
+        if (depth < depth_max):
+            cit_arr = []
+            for citation in pub._citations:
+
+                # Momentan werden nur die citations mit acs in der URL gespeichert, da wir von anderen 
+                # Quellen die Infotmationen nicht extrahieren können.
+                if ("acs" in citation.doi_url):
+                    cit_arr.append(citation.doi_url)
+
+            # Rekusriver Aufruf der Funktion.
+            process_rec_depth(cit_arr, depth, depth_max)
+            
+ 
+    
+# Programmtest, weil noch keine Verbindung zum Input besteht.
+arr = []
+arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+#arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+#arr.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
+#arr.append('https://doi.org/10.1021/acs.jcim.0c00741')
+
+#arr.append('https://doi.org/10.1021/ci700007b')
+#arr.append('https://doi.org/10.1021/acs.jcim.5b00292')
+#url = sys.argv[1]
+#arr.append[url]
+
+nodes,edges = process_main(arr,1)
+
+print("Knoten:\n")
+for vortex in nodes:
+    print(vortex, "\n")
+print("\nKanten:\n")
+for edge in edges:
+    print(edge,"\n")
+\ No newline at end of file