From 2b8fcc8e60ae1c02babd12574c532ad87c68a676 Mon Sep 17 00:00:00 2001
From: Malte Schokolowski <baw8441@uni-hamburg.de>
Date: Thu, 23 Dec 2021 15:05:43 +0100
Subject: [PATCH] added return of dois with error

---
 .../construct_new_graph/add_citations_rec.py  |  2 +-
 .../construct_new_graph/initialize_graph.py   |  6 ++++--
 verarbeitung/get_pub_from_input.py            |  4 +---
 verarbeitung/process_main.py                  |  6 ++++--
 verarbeitung/start_script.py                  | 19 +++++++++++++++++++
 .../update_graph/connect_new_input.py         |  3 +--
 verarbeitung/update_graph/update_graph.py     |  6 ++++--
 7 files changed, 34 insertions(+), 12 deletions(-)
 create mode 100644 verarbeitung/start_script.py

diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py
index 87badf4..95afa35 100644
--- a/verarbeitung/construct_new_graph/add_citations_rec.py
+++ b/verarbeitung/construct_new_graph/add_citations_rec.py
@@ -120,7 +120,7 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty
             if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly
                 citation_pub_obj = get_pub(citation.doi_url, test_var)
                 if (type(citation_pub_obj) != Publication):
-                    print(pub)
+                    #print(pub)
                     continue 
                 
                 if (cit_type == "Citation"):
diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py
index bfc7df2..4446453 100644
--- a/verarbeitung/construct_new_graph/initialize_graph.py
+++ b/verarbeitung/construct_new_graph/initialize_graph.py
@@ -76,6 +76,7 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
         pub = get_pub(pub_doi, test_var)
         if (type(pub) != Publication):
             #print(pub)
+            error_doi_list.append(pub_doi)
             continue       
 
         # checks if publication already exists in nodes
@@ -163,13 +164,14 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va
 
     
     # creates empty lists to save nodes and edges
-    global nodes, edges
+    global nodes, edges, error_doi_list
     if update_var:
         nodes = input_nodes
         edges = input_edges 
     else:
         nodes = []
         edges = []
+    error_doi_list = []
 
     # initializes nodes/edges from input and gets a list with publication objects for citations and references returned
     references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
@@ -188,4 +190,4 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va
     del edges
     gc.collect()
 
-    return(new_nodes,new_edges)
+    return(new_nodes,new_edges, error_doi_list)
diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py
index 2766ba8..6bdce3b 100644
--- a/verarbeitung/get_pub_from_input.py
+++ b/verarbeitung/get_pub_from_input.py
@@ -40,9 +40,7 @@ def get_pub(pub_doi, test_var):
     else: 
         inter = Input()
         try:
-            pub = inter.get_publication(pub_doi) #creates an object of class Publication
-        except AttributeError:
-            pub = inter.get_publication(pub_doi)
+            pub = inter.get_publication(pub_doi) #creates an object of class Publication)
         except ValueError:
             return(ValueError)
         except IndexError:
diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py
index db17e1f..78df2db 100644
--- a/verarbeitung/process_main.py
+++ b/verarbeitung/process_main.py
@@ -45,10 +45,12 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso
     json_file = Path(json_file)
     
     if json_file.is_file():
-        nodes, edges = update_graph(url_list, json_file, search_depth, search_height)
+        nodes, edges, error_doi_list = update_graph(url_list, json_file, search_depth, search_height)
     else:
-        nodes, edges = init_graph_construction(url_list, search_depth, search_height)
+        nodes, edges, error_doi_list = init_graph_construction(url_list, search_depth, search_height)
 
     # exports graph to given json file name
     output_to_json(nodes, edges, json_file)
+
+    return error_doi_list
     
\ No newline at end of file
diff --git a/verarbeitung/start_script.py b/verarbeitung/start_script.py
new file mode 100644
index 0000000..4901706
--- /dev/null
+++ b/verarbeitung/start_script.py
@@ -0,0 +1,19 @@
+import sys
+import gc
+from pathlib import Path
+from verarbeitung.process_main import Processing
+#from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes
+
+
+doi_list = []
+#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
+#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
+doi_list.append('https://doi.org/10.1021/acs.medchem.0c01332')
+doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709')
+error_list = Processing(doi_list, 2, 2, 'test.json')
+print(error_list)
+
+del doi_list
+del error_list
+gc.collect()
diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py
index 3560341..5acb05f 100644
--- a/verarbeitung/update_graph/connect_new_input.py
+++ b/verarbeitung/update_graph/connect_new_input.py
@@ -149,5 +149,4 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes
     #add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var)
     #add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var)
 
-    init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges)
-
+    nodes, edges, error_doi_list = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges)
diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py
index 7e3ad76..739aa71 100644
--- a/verarbeitung/update_graph/update_graph.py
+++ b/verarbeitung/update_graph/update_graph.py
@@ -60,6 +60,7 @@ def get_new_input_dois(new_input, test_var):
         pub = get_pub(new_node, test_var)
         if (type(pub) != Publication):
             #print(pub)
+            error_doi_list.append(new_node)
             continue
 
         new_input_dois.append(pub.doi_url)
@@ -87,9 +88,10 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
     old_obj_input_list , old_edges_list = input_from_json(json_file)
 
     # one global list to save the process of removing unneeded publications and one to save valid edges
-    global processed_list, valid_edges
+    global processed_list, valid_edges, error_doi_list
     processed_list = old_obj_input_list
     valid_edges = old_edges_list
+    error_doi_list = []
 
 
     # get dois from lists to compare for differences
@@ -108,4 +110,4 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
     if (len(inserted_nodes) > 0):
         connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var)
 
-    return(processed_list, valid_edges) 
+    return(processed_list, valid_edges, error_doi_list) 
-- 
GitLab