From 9e5dae7ece09816b4e6b4743245b6102deec848e Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Tue, 21 Dec 2021 14:07:00 +0100 Subject: [PATCH] bug fixes --- verarbeitung/construct_new_graph/export_to_json.py | 2 +- verarbeitung/construct_new_graph/initialize_graph.py | 12 ++++++++---- verarbeitung/test/construct_graph_unittest.py | 12 ++++++------ verarbeitung/test_output.json | 2 +- verarbeitung/update_graph/connect_new_input.py | 5 +++-- verarbeitung/update_graph/delete_nodes_edges.py | 8 ++++---- verarbeitung/update_graph/update_graph.py | 2 +- 7 files changed, 24 insertions(+), 19 deletions(-) diff --git a/verarbeitung/construct_new_graph/export_to_json.py b/verarbeitung/construct_new_graph/export_to_json.py index c69a61b..68a25be 100644 --- a/verarbeitung/construct_new_graph/export_to_json.py +++ b/verarbeitung/construct_new_graph/export_to_json.py @@ -78,7 +78,7 @@ def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False) list_of_edge_dicts = format_edges(edges) dict_of_all["nodes"] = list_of_node_dicts dict_of_all["links"] = list_of_edge_dicts - if (test_var and json_file == 'json_text.json'): + if (test_var): with open('test_output.json','w') as outfile: json.dump(dict_of_all, outfile) else: diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index 27cf455..b6bdeee 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -96,7 +96,7 @@ def complete_inner_edges(): edges.append([node.doi_url,reference.doi_url]) -def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, input_nodes = [], input_edges = []): +def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, update_var = False, input_nodes = [], input_edges = []): ''' :param doi_input_list: input list of doi from UI :type doi_input_list: List[String] @@ -128,8 +128,12 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va # creates empty lists to save nodes and edges global nodes, edges - nodes = input_nodes - edges = input_edges + if update_var: + nodes = input_nodes + edges = input_edges + else: + nodes = [] + edges = [] # initializes nodes/edges from input and gets a list with publication objects for citations and references returned references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) @@ -142,6 +146,6 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va complete_inner_edges() # calls a skript to save nodes and edges of graph in .json file - #output_to_json(nodes, edges, test_var) + output_to_json(nodes, edges, test_var = test_var) return(nodes,edges) diff --git a/verarbeitung/test/construct_graph_unittest.py b/verarbeitung/test/construct_graph_unittest.py index 328bb43..383f306 100644 --- a/verarbeitung/test/construct_graph_unittest.py +++ b/verarbeitung/test/construct_graph_unittest.py @@ -52,33 +52,33 @@ class ConstructionTest(unittest.TestCase): self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) def testRightHeight(self): - nodes, edges = init_graph_construction(['doi_h01'],1,0,True) + nodes, edges = init_graph_construction(['doi_h01'],0,1,True) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_h01']) self.assertCountEqual(edges, []) - nodes, edges = init_graph_construction(['doi_h02'],1,0,True) + nodes, edges = init_graph_construction(['doi_h02'],0,1,True) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) self.assertCountEqual(edges, [['doi_h1','doi_h02']]) - nodes, edges = init_graph_construction(['doi_h02'],2,0,True) + nodes, edges = init_graph_construction(['doi_h02'],0,2,True) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) def testRightDepth(self): - nodes, edges = init_graph_construction(['doi_d01'],0,1,True) + nodes, edges = init_graph_construction(['doi_d01'],1,0,True) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_d01']) self.assertCountEqual(edges, []) - nodes, edges = init_graph_construction(['doi_d02'],0,1,True) + nodes, edges = init_graph_construction(['doi_d02'],1,0,True) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) self.assertCountEqual(edges, [['doi_d02','doi_d1']]) - nodes, edges = init_graph_construction(['doi_d02'],0,2,True) + nodes, edges = init_graph_construction(['doi_d02'],2,0,True) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json index 840e19d..f96362a 100644 --- a/verarbeitung/test_output.json +++ b/verarbeitung/test_output.json @@ -1 +1 @@ -{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_i", "name": "title_lg_2_i", "author": ["contributor_lg_2_i"], "year": "date_lg_2_i", "journal": "journal_lg_2_i", "group": "Input", "depth": 0, "citations": 4}, {"doi": "doi_lg_2_d11", "name": "title_lg_2_d11", "author": ["contributor_lg_2_d11"], "year": "date_lg_2_d11", "journal": "journal_lg_2_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_2_d12", "name": "title_lg_2_d12", "author": ["contributor_lg_2_d12"], "year": "date_lg_2_d12", "journal": "journal_lg_2_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_2_h11", "name": "title_lg_2_h11", "author": ["contributor_lg_2_h11"], "year": "date_lg_2_h11", "journal": "journal_lg_2_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_h12", "name": "title_lg_2_h12", "author": ["contributor_lg_2_h12"], "year": "date_lg_2_h12", "journal": "journal_lg_2_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_cg_i", "name": "title_cg_i", "author": ["contributor_cg_i"], "year": "date_cg_i", "journal": "journal_cg_i", "group": "Citedby", "depth": 1, "citations": 3}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h21", "name": "title_lg_2_h21", "author": ["contributor_lg_2_h21"], "year": "date_lg_2_h21", "journal": "journal_lg_2_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h22", "name": "title_lg_2_h22", "author": ["contributor_lg_2_h22"], "year": "date_lg_2_h22", "journal": "journal_lg_2_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h23", "name": "title_lg_2_h23", "author": ["contributor_lg_2_h23"], "year": "date_lg_2_h23", "journal": "journal_lg_2_h23", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_lg_2_h24", "name": "title_lg_2_h24", "author": ["contributor_lg_2_h24"], "year": "date_lg_2_h24", "journal": "journal_lg_2_h24", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_cg_h11", "name": "title_cg_h11", "author": ["contributor_cg_h11"], "year": "date_cg_h11", "journal": "journal_cg_h11", "group": "Citedby", "depth": 2, "citations": 2}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_2_d21", "name": "title_lg_2_d21", "author": ["contributor_lg_2_d21"], "year": "date_lg_2_d21", "journal": "journal_lg_2_d21", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d22", "name": "title_lg_2_d22", "author": ["contributor_lg_2_d22"], "year": "date_lg_2_d22", "journal": "journal_lg_2_d22", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d23", "name": "title_lg_2_d23", "author": ["contributor_lg_2_d23"], "year": "date_lg_2_d23", "journal": "journal_lg_2_d23", "group": "Reference", "depth": -2, "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d11"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d12"}, {"source": "doi_lg_2_h11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_h12", "target": "doi_lg_2_i"}, {"source": "doi_cg_i", "target": "doi_lg_2_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h21", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h22", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h23"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h24"}, {"source": "doi_lg_1_h23", "target": "doi_cg_i"}, {"source": "doi_cg_h11", "target": "doi_cg_i"}, {"source": "doi_lg_2_h11", "target": "doi_cg_i"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_2_d11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_d11", "target": "doi_lg_2_d21"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d22"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_d12"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]} \ No newline at end of file diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index b87b856..a7e7fda 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -48,7 +48,8 @@ def find_furthermost_citations(node, old_search_depth, cit_type): for new_cit in get_cit_type_list(cit_node, cit_type): for new_cit_node in nodes: if new_cit.doi_url == new_cit_node.doi_url: - new_citations.append(node) + new_cit_node.group -= node.group + new_citations.append(new_cit_node) citations_saved = new_citations # returns the references/citations which needs to be processed to complete contruction @@ -149,5 +150,5 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes #add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var) #add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var) - init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, input_nodes = nodes, input_edges = edges) + init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges) diff --git a/verarbeitung/update_graph/delete_nodes_edges.py b/verarbeitung/update_graph/delete_nodes_edges.py index 7145fd7..53e826e 100644 --- a/verarbeitung/update_graph/delete_nodes_edges.py +++ b/verarbeitung/update_graph/delete_nodes_edges.py @@ -90,7 +90,7 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list): search_ref_graph_rec(pub) search_cit_graph_rec(pub) - valid_edges = back_to_valid_edges(old_edges_list, usable_nodes) - - - return(usable_nodes, valid_edges) \ No newline at end of file + back_to_valid_edges(old_edges_list, usable_nodes) + input_list = usable_nodes.copy() + print(input_list) + #return(usable_nodes, valid_edges) diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 169e17d..90114c3 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -102,7 +102,7 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes # deletes publications and edges from node_list if publications can no longer be reached if (len(deleted_nodes) > 0): - processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list) + delete_nodes_and_edges(processed_list, common_nodes, valid_edges) old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var) -- GitLab