From 4588d4d0cabcfb4def262dcdc78bd3eb6bab402a Mon Sep 17 00:00:00 2001
From: Malte Schokolowski <baw8441@uni-hamburg.de>
Date: Wed, 19 Jan 2022 14:22:27 +0100
Subject: [PATCH] verarbeitung: bug fix in update_graph and connect_new_input

---
 .../construct_new_graph/add_citations_rec.py  |  2 +-
 verarbeitung/start_script.py                  |  7 ++-
 .../update_graph/connect_new_input.py         | 57 ++++++++++---------
 verarbeitung/update_graph/update_graph.py     |  5 +-
 4 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py
index c5f4dd0..dee665c 100644
--- a/verarbeitung/construct_new_graph/add_citations_rec.py
+++ b/verarbeitung/construct_new_graph/add_citations_rec.py
@@ -46,7 +46,7 @@ def get_cit_type_list(pub, cit_type):
         :param cit_type:            variable to differenciate citation and reference call
         :type cit_type:             String
 
-        function to create nodes and edges and call create_graph_structure_citations
+        function to return citation or reference list for given pub
     '''
     if cit_type == "Citation":
         return(pub.citations)
diff --git a/verarbeitung/start_script.py b/verarbeitung/start_script.py
index 4cc48e4..43e9bcc 100644
--- a/verarbeitung/start_script.py
+++ b/verarbeitung/start_script.py
@@ -6,13 +6,16 @@ from verarbeitung.process_main import Processing
 
 
 doi_list = []
-doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
+#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
 #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
 #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
 #doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
 #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709')
 #doi_list.append('https://doi.org/10.1021/acs.chemrev.8b00728')
-error_list = Processing(doi_list, 2, 2, 'test.json')
+#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.chemrestox.0c00006')#
+doi_list.append('https://doi.org/10.1021/acs.chemrev.8b00728')
+doi_list.append('https://doi.org/10.1021/acs.jpclett.1c03335 ')
+error_list = Processing(doi_list, 2, 2, 'test728.json')
 print(error_list)
 
 del doi_list
diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py
index af7363a..2689efd 100644
--- a/verarbeitung/update_graph/connect_new_input.py
+++ b/verarbeitung/update_graph/connect_new_input.py
@@ -19,16 +19,19 @@ from os import error
 
 sys.path.append("../")
 
+from input.publication import Publication
+from verarbeitung.get_pub_from_input import get_pub
 from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
 from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit
 
 
-def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, cit_type):
+
+def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, new_search_depth, cit_type):
     global nodes, edges
     nodes = test_nodes
     edges = test_edges
 
-    return(find_furthermost_citations(nodes, edges, changed_node, old_search_depth, cit_type))
+    return(find_furthermost_citations(nodes, edges, changed_node, old_search_depth, new_search_depth, cit_type))
 
 def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height):
     global nodes, edges
@@ -40,7 +43,7 @@ def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_node
 
 
 
-def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, cit_type):
+def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new_search_depth, cit_type):
     '''
         :param new_nodes:           list of nodes which are generated seperately from main node list to avoid recursive problems
         :type new_nodes             List[Publication]
@@ -64,7 +67,7 @@ def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, cit
     citations_saved = [node]
 
     # group of node and old search depth/height determines how often the loop needs to be repeated
-    for depth in range(old_search_depth - abs(node.group)):
+    for depth in range(min(old_search_depth - abs(node.group), new_search_depth)):
         new_citations = []
         for citation in citations_saved:
             for cit_node in nodes:
@@ -147,36 +150,38 @@ def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_he
         
         # moves known reference node to input and completes citations and references for this node
         if (node.group < 0) and (node.doi_url in inserted_nodes):
-            node.group = 0
-            new_max_citations = find_furthermost_citations(new_nodes, new_edges, node, old_search_height + abs(node.group), "Citation")
-            add_citations(new_nodes, new_edges, new_max_citations, old_search_height, new_search_height, "Citation", test_var)
-            
-            new_nodes, new_edges, error_doi_list_ref = init_graph_construction([node.doi_url], new_search_height, 0, test_var, True, new_nodes, new_edges)
-                
-            for err_node in error_doi_list_ref:
-                if err_node not in error_doi_list:
-                    error_doi_list.append(err_node)
+            pub = get_pub(node.doi_url, test_var)
+            if (type(pub) != Publication):
 
-            
+                error_doi_list.append(node.doi_url)
+                continue
+
+            pub.group = node.group
             old_max_references = find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, "Reference")
-            add_citations(new_nodes, new_edges, old_max_references, old_search_depth, new_search_depth, "Reference", test_var)
+            add_citations(new_nodes, new_edges, old_max_references, min(old_search_depth - abs(node.group), new_search_depth), new_search_depth, "Reference", test_var)
+
+            add_citations(new_nodes, new_edges, [pub], 0, new_search_height, "Citation", test_var)
+            
+            pub.group = 0
+            new_nodes.append(pub)  
             handled_inserted_nodes.append(node)
             
         # moves known citation node to input and completes citations and references for this node
         elif (node.group > 0) and (node.doi_url in inserted_nodes):
-            node.group = 0
-            new_max_references = find_furthermost_citations(new_nodes, new_edges, node, old_search_depth + abs(node.group), "Reference")
-            add_citations(new_nodes, new_edges, new_max_references, old_search_depth, new_search_depth, "Reference", test_var)
-            #new_nodes.append(new_max_references)
-            
-            new_nodes, new_edges, error_doi_list_ref = init_graph_construction([node.doi_url], new_search_depth, 0, test_var, True, new_nodes, new_edges)
-            for err_node in error_doi_list_ref:
-                if err_node not in error_doi_list:
-                    error_doi_list.append(err_node)
+            pub = get_pub(node.doi_url, test_var)
+            if (type(pub) != Publication):
+
+                error_doi_list.append(node.doi_url)
+                continue
+
+            pub.group = node.group
+            old_max_citations = find_furthermost_citations(new_nodes, new_edges, pub, old_search_height, new_search_height, "Citation")
+            add_citations(new_nodes, new_edges, old_max_citations, min(old_search_height - abs(node.group), new_search_height), new_search_height, "Citation", test_var)
 
+            add_citations(new_nodes, new_edges, [pub], 0, new_search_depth, "Reference", test_var)        
             
-            old_max_citations = find_furthermost_citations(new_nodes, new_edges, node, old_search_height, "Citation")
-            add_citations(new_nodes, new_edges, old_max_citations, old_search_height, new_search_height, "Citation", test_var)
+            pub.group = 0
+            new_nodes.append(pub)
             handled_inserted_nodes.append(node)
 
     for new_node in new_nodes:
diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py
index 416be51..1e74773 100644
--- a/verarbeitung/update_graph/update_graph.py
+++ b/verarbeitung/update_graph/update_graph.py
@@ -101,15 +101,18 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
     # retrieve which publications are already known, removed, inserted
     common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_doi_input_list, new_doi_input_list)
 
-    old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
+    
 
     processed_list_copy = processed_list.copy()
     valid_edges_copy = valid_edges.copy()
 
+    old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
+
     # deletes publications and edges from node_list if publications can no longer be reached
     if (len(deleted_nodes) > 0):
         processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges)
     
+    
     if (len(inserted_nodes) > 0):      
         inserted_pub_nodes, inserted_edges, error_doi_list_new = connect_old_and_new_input(processed_list_copy, valid_edges_copy, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var)
         for err_node in error_doi_list_new:
-- 
GitLab