From 071b501172cdbfb7df05f4741a4303cd2a1758bf Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Wed, 12 Jan 2022 01:41:53 +0100 Subject: [PATCH] fixed bug in verarbeitung.update_graph --- .../construct_new_graph/initialize_graph.py | 6 +- verarbeitung/test/input_test.py | 2 +- verarbeitung/test/update_graph_unittest.py | 174 +++++++++++++++--- .../update_graph/connect_new_input.py | 121 ++++++++---- verarbeitung/update_graph/update_graph.py | 35 +++- 5 files changed, 272 insertions(+), 66 deletions(-) diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index 3807842..1368f6e 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -190,7 +190,5 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va del edges gc.collect() - if test_var: - return(new_nodes, new_edges) - else: - return(new_nodes, new_edges, error_doi_list) + + return(new_nodes, new_edges, error_doi_list) diff --git a/verarbeitung/test/input_test.py b/verarbeitung/test/input_test.py index a701f7e..352f983 100644 --- a/verarbeitung/test/input_test.py +++ b/verarbeitung/test/input_test.py @@ -100,5 +100,5 @@ list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_ed right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3, large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12, large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, - large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23, crossed_graph_h21, crossed_graph_h22, crossed_graph_h11, + large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23, large_graph_2_d24, crossed_graph_h21, crossed_graph_h22, crossed_graph_h11, crossed_graph_i, crossed_graph_d11, crossed_graph_d12, crossed_graph_d21, crossed_graph_d22] diff --git a/verarbeitung/test/update_graph_unittest.py b/verarbeitung/test/update_graph_unittest.py index 765fcbf..77746ff 100644 --- a/verarbeitung/test/update_graph_unittest.py +++ b/verarbeitung/test/update_graph_unittest.py @@ -14,40 +14,74 @@ from verarbeitung.update_graph.update_depth import reduce_max_height_depth_test, from verarbeitung.update_graph.update_edges import back_to_valid_edges from verarbeitung.update_graph.delete_nodes_edges import search_ref_cit_graph_rec_test from verarbeitung.update_graph.compare_old_and_new_node_lists import compare_old_and_new_node_lists -from verarbeitung.update_graph.connect_new_input import find_furthermost_citations_test +from verarbeitung.update_graph.connect_new_input import find_furthermost_citations_test, complete_changed_group_nodes_test from verarbeitung.get_pub_from_input import input_test_func class UpdatingTest(unittest.TestCase): maxDiff = None def test_deleted_input_dois(self): - nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True) - nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True) + nodes_old_single, edges_old_single, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) + nodes_old_both, edges_old_both, err_list = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True) output_to_json(nodes_old_both, edges_old_both, test_var=True) - nodes_new_single, edges_new_single = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True) + nodes_new_single, edges_new_single, err_list = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True) self.assertCountEqual(nodes_old_single,nodes_new_single) self.assertCountEqual(edges_old_single, edges_new_single) - nodes_old_single, edges_old_single = init_graph_construction(['doi_cg_i'],3,3,True) - nodes_old_two, edges_old_two = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True) - nodes_old_three, edges_old_three = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True) + nodes_old_single, edges_old_single, err_list = init_graph_construction(['doi_cg_i'],3,3,True) + nodes_old_two, edges_old_two, err_list = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True) + nodes_old_three, edges_old_three, err_list = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True) def test_new_height(self): - nodes_height_0, edges_height_0 = init_graph_construction(['doi_lg_1_i'],2,0,True) - nodes_height_1, edges_height_1 = init_graph_construction(['doi_lg_1_i'],2,1,True) - nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True) + nodes_height_0, edges_height_0, err_list = init_graph_construction(['doi_lg_1_i'],2,0,True) + nodes_height_1, edges_height_1, err_list = init_graph_construction(['doi_lg_1_i'],2,1,True) + nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) - nodes_new_height_1, edges_new_height_1 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True) + nodes_new_height_1, edges_new_height_1, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True) self.assertCountEqual(nodes_height_1, nodes_new_height_1) self.assertCountEqual(edges_height_1, edges_new_height_1) - nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True) + nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) - nodes_new_height_0, edges_new_height_0 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True) + nodes_new_height_0, edges_new_height_0, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True) self.assertCountEqual(nodes_height_0, nodes_new_height_0) self.assertCountEqual(edges_height_0, edges_new_height_0) + def test_ref_to_input(self): + nodes, edges, err_list = init_graph_construction(['doi_cg_i'], 2, 2, True) + nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11'], 2, 2, True) + output_to_json(nodes, edges, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes_2) + self.assertCountEqual(new_edges, edges_2) + + nodes, edges, err_list = init_graph_construction(['doi_cg_i','doi_lg_2_i'], 2, 2, True) + nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11','doi_lg_2_i'], 2, 2, True) + + output_to_json(nodes, edges, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes_2) + self.assertCountEqual(new_edges, edges_2) + + output_to_json(nodes_2, edges_2, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes_2) + self.assertCountEqual(new_edges, edges_2) + + nodes, edges, err_list = init_graph_construction(['doi_cg_i','doi_lg_2_i'], 2, 2, True) + nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_i','doi_lg_2_h11','doi_lg_1_i'], 3, 3, True) + + output_to_json(nodes_2, edges_2, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_i','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes) + self.assertCountEqual(new_edges, edges) + + + + + + ## Ab hier die Tests für die einzelnen Funktionen ## # update_graph.py: @@ -137,7 +171,7 @@ class UpdatingTest(unittest.TestCase): # import_from_json.py: def test_input_from_json(self): - nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True) + nodes_old, edges_old, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) output_to_json(nodes_old, edges_old, test_var = True) nodes_new, edges_new = input_from_json('test_output.json') self.assertCountEqual(nodes_old,nodes_new) @@ -223,11 +257,11 @@ class UpdatingTest(unittest.TestCase): pub_lg_2_d22.group = -2 pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_lg_2_d22] - self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_h11, 2, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) - self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_h11, 1, "Citation"),[pub_lg_2_h11]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 1, "Citation"),[pub_lg_2_h11]) - self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_d11, 2, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) - self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_d11, 1, "Reference"),[pub_lg_2_d11]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 1, "Reference"),[pub_lg_2_d11]) def test_complete_changed_group_nodes(self): @@ -239,11 +273,109 @@ class UpdatingTest(unittest.TestCase): pub_cg_h21.group = 2 pub_cg_h22 = input_test_func('doi_cg_h22') pub_cg_h22.group = 2 - pubs = [] + pub_cg_d11 = input_test_func('doi_cg_d11') + pub_cg_d11.group = -1 + pub_cg_d12 = input_test_func('doi_cg_d12') + pub_cg_d12.group = -1 + pub_cg_d21 = input_test_func('doi_cg_d21') + pub_cg_d21.group = -2 + pub_cg_d22 = input_test_func('doi_cg_d22') + pub_cg_d22.group = -2 + pub_lg_1_h23 = input_test_func('doi_lg_1_h23') + pub_lg_1_h23.group = 2 + pub_lg_1_d23 = input_test_func('doi_lg_1_d23') + pub_lg_1_d23.group = -2 - - + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h11.group = 1 + pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h21.group = 2 + pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + pub_lg_2_h22.group = 2 + pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d11.group = -1 + pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + pub_lg_2_d12.group = -1 + pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d21.group = -2 + pub_lg_2_d22 = input_test_func('doi_lg_2_d22') + pub_lg_2_d22.group = -2 + pub_lg_2_d23 = input_test_func('doi_lg_2_d23') + pub_lg_2_d23.group = -2 + pub_lg_2_d24 = input_test_func('doi_lg_2_d24') + pub_lg_2_d24.group = -2 + + + moved_1_pub_cg_i = input_test_func('doi_cg_i') + moved_1_pub_cg_i.group = 1 + moved_1_pub_cg_h11 = input_test_func('doi_cg_h11') + moved_1_pub_cg_h11.group = 2 + moved_1_pub_cg_h21 = input_test_func('doi_cg_h21') + moved_1_pub_cg_h21.group = 3 + moved_1_pub_cg_h22 = input_test_func('doi_cg_h22') + moved_1_pub_cg_h22.group = 3 + moved_1_pub_cg_d11 = input_test_func('doi_cg_d11') + moved_1_pub_cg_d11.group = 0 + + moved_1_pub_cg_d21 = input_test_func('doi_cg_d21') + moved_1_pub_cg_d21.group = -1 + + moved_1_pub_lg_1_h23 = input_test_func('doi_lg_1_h23') + moved_1_pub_lg_1_h23.group = 2 + moved_1_pub_lg_1_d23 = input_test_func('doi_lg_1_d23') + moved_1_pub_lg_1_d23.group = -1 + + moved_1_pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + moved_1_pub_lg_2_h11.group = 1 + + + moved_2_pub_cg_i = input_test_func('doi_cg_i') + moved_2_pub_cg_i.group = -1 + moved_2_pub_cg_d11 = input_test_func('doi_cg_d11') + moved_2_pub_cg_d11.group = -2 + moved_2_pub_cg_d12 = input_test_func('doi_cg_d12') + moved_2_pub_cg_d12.group = -2 + moved_2_pub_cg_d21 = input_test_func('doi_cg_d21') + moved_2_pub_cg_d21.group = -3 + moved_2_pub_cg_d22 = input_test_func('doi_cg_d22') + moved_2_pub_cg_d22.group = -3 + + moved_2_pub_lg_1_d23 = input_test_func('doi_lg_1_d23') + moved_2_pub_lg_1_d23.group = -3 + + moved_2_pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + moved_2_pub_lg_2_h21.group = 1 + moved_2_pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + moved_2_pub_lg_2_h22.group = 1 + moved_2_pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + moved_2_pub_lg_2_h11.group = 0 + moved_2_pub_lg_2_i = input_test_func('doi_lg_2_i') + moved_2_pub_lg_2_i.group = -2 + moved_2_pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + moved_2_pub_lg_2_d11.group = -2 + moved_2_pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + moved_2_pub_lg_2_d12.group = -2 + moved_2_pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + moved_2_pub_lg_2_d21.group = -3 + moved_2_pub_lg_2_d22 = input_test_func('doi_lg_2_d22') + moved_2_pub_lg_2_d22.group = -3 + moved_2_pub_lg_2_d23 = input_test_func('doi_lg_2_d23') + moved_2_pub_lg_2_d23.group = -3 + moved_2_pub_lg_2_d24 = input_test_func('doi_lg_2_d24') + moved_2_pub_lg_2_d24.group = -3 + + pubs = [pub_cg_i, pub_cg_h11, pub_cg_h21, pub_cg_h22, pub_cg_d11, pub_cg_d12, pub_cg_d21, pub_cg_d22, pub_lg_1_h23, pub_lg_1_d23, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_h11, pub_lg_2_i, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_d21, pub_lg_2_d22, pub_lg_2_d23, pub_lg_2_d24] + edges = [] + nodes, edges, handled_nodes = complete_changed_group_nodes_test(pubs, edges, 'doi_cg_d11', 2, 2, 2, 2) + self.assertCountEqual(nodes, [moved_1_pub_cg_d11, moved_1_pub_cg_d21, moved_1_pub_lg_1_d23, moved_1_pub_cg_i, moved_1_pub_lg_1_h23, moved_1_pub_cg_h11, moved_1_pub_lg_2_h11]) + self.assertCountEqual(edges, [['doi_cg_d11','doi_lg_1_d23'],['doi_cg_d11','doi_cg_d21'],['doi_cg_i','doi_cg_d11'],['doi_lg_1_h23','doi_cg_i'],['doi_cg_h11','doi_cg_i'],['doi_lg_2_h11','doi_cg_i']]) + + nodes, edges, handled_nodes = complete_changed_group_nodes_test(pubs, edges, 'doi_lg_2_h11', 2, 2, 3, 3) + self.assertCountEqual(nodes, [moved_2_pub_cg_i, moved_2_pub_cg_d11, moved_2_pub_lg_1_d23, moved_2_pub_cg_d21, moved_2_pub_cg_d12, moved_2_pub_cg_d22, moved_2_pub_lg_2_h21, moved_2_pub_lg_2_h22, moved_2_pub_lg_2_h11, moved_2_pub_lg_2_i, moved_2_pub_lg_2_d11, moved_2_pub_lg_2_d21, moved_2_pub_lg_2_d12, moved_2_pub_lg_2_d22, moved_2_pub_lg_2_d23, moved_2_pub_lg_2_d24]) + self.assertCountEqual(edges, [['doi_cg_d11','doi_lg_1_d23'],['doi_cg_d11','doi_cg_d21'],['doi_cg_i','doi_cg_d11'],['doi_cg_i','doi_cg_d12'],['doi_cg_d12','doi_cg_d22'],['doi_lg_2_h11','doi_cg_i'],['doi_cg_i','doi_lg_2_i'],['doi_lg_2_h21','doi_lg_2_h11'],['doi_lg_2_h22','doi_lg_2_h11'],['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11'],['doi_lg_2_d11','doi_lg_2_i'],['doi_lg_2_d11','doi_lg_2_d21'],['doi_lg_2_i','doi_lg_2_d12'],['doi_lg_2_d12','doi_lg_2_d22'],['doi_lg_2_d12','doi_lg_2_d23'],['doi_lg_2_d12','doi_lg_2_d24']]) def keep_only_dois(nodes): diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index d9cbf82..df28ee6 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -23,22 +23,24 @@ from verarbeitung.construct_new_graph.initialize_graph import init_graph_constru from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit -def find_furthermost_citations_test(test_nodes, changed_node, old_search_depth, cit_type): - global nodes +def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, cit_type): + global nodes, edges nodes = test_nodes + edges = test_edges - return(find_furthermost_citations(changed_node, old_search_depth, cit_type)) + return(find_furthermost_citations(nodes, edges, changed_node, old_search_depth, cit_type)) -def complete_changed_group_nodes_test(test_nodes, inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height): - global nodes +def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height): + global nodes, edges nodes = test_nodes + edges = test_edges - handled_nodes = complete_changed_group_nodes(inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, True) - return(nodes, handled_nodes) + handled_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, True) + return(new_nodes, new_edges, handled_nodes) -def find_furthermost_citations(node, old_search_depth, cit_type): +def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, cit_type): ''' :param node: node which is known but not from input group :type node: Publication @@ -62,9 +64,43 @@ def find_furthermost_citations(node, old_search_depth, cit_type): for new_cit in get_cit_type_list(cit_node, cit_type): for new_cit_node in nodes: if new_cit.doi_url == new_cit_node.doi_url: - new_cit_node.group -= node.group - new_citations.append(new_cit_node) + + if cit_type == "Citation": + + # to find a cyclus and not change height + not_in_citations = True + for new_cit_node_citation in new_cit_node.citations: + if (cit_node.doi_url == new_cit_node_citation.doi_url): + not_in_citations = False + break + if (not_in_citations): + new_citations.append(new_cit_node) + + # change height accordingly and add link to edge + new_cit_node.group = node.group + depth + 1 + if [cit_node.doi_url,cit_node.doi_url] not in new_edges: + new_edges.append([new_cit_node.doi_url,cit_node.doi_url]) + + elif cit_type == "Reference": + + # to find a cyclus and not change depth + not_in_citations = True + for new_cit_node_reference in new_cit_node.references: + if (new_cit_node.doi_url == new_cit_node_reference.doi_url): + not_in_citations = False + break + if (not_in_citations): + new_citations.append(new_cit_node) + + # change height accordingly and add link to edge + new_cit_node.group = node.group - depth - 1 + if [cit_node.doi_url, new_cit_node.doi_url] not in new_edges: + new_edges.append([cit_node.doi_url, new_cit_node.doi_url]) + citations_saved = new_citations + for new_citation in new_citations: + if new_citation not in new_nodes: + new_nodes.append(new_citation) # returns the references/citations which needs to be processed to complete contruction return(citations_saved) @@ -97,28 +133,50 @@ def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_he # saves which nodes were handled because they were known before handled_inserted_nodes = [] + new_nodes = [] + new_edges = [] for node in nodes: - + # moves known reference node to input and completes citations and references for this node if (node.group < 0) and (node.doi_url in inserted_nodes): - node_citations = create_global_lists_cit(nodes, edges, node, 1, new_search_height, "Citation", test_var) - add_citations(nodes, edges, node_citations, 1, new_search_height, "Citation", test_var) - old_max_references = find_furthermost_citations(node, old_search_depth, "Reference") - add_citations(nodes, edges, old_max_references, old_search_depth, new_search_depth, "Reference", test_var) node.group = 0 + new_max_citations = find_furthermost_citations(new_nodes, new_edges, node, old_search_height + abs(node.group), "Citation") + add_citations(new_nodes, new_edges, new_max_citations, old_search_height, new_search_height, "Citation", test_var) + + new_nodes, new_edges, error_doi_list_ref = init_graph_construction([node.doi_url], new_search_height, 0, test_var, True, new_nodes, new_edges) + + for err_node in error_doi_list_ref: + if err_node not in error_doi_list: + error_doi_list.append(err_node) + + + old_max_references = find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, "Reference") + add_citations(new_nodes, new_edges, old_max_references, old_search_depth, new_search_depth, "Reference", test_var) handled_inserted_nodes.append(node) # moves known citation node to input and completes citations and references for this node elif (node.group > 0) and (node.doi_url in inserted_nodes): - node_references = create_global_lists_cit(nodes, edges, node, 1, new_search_depth, "Reference", test_var) - add_citations(nodes, edges, node_references, 1, new_search_depth, "Reference", test_var) - old_max_citations = find_furthermost_citations(node, old_search_height, "Citation") - add_citations(nodes, edges, old_max_citations, old_search_height, new_search_height, "Citation", test_var) - node.group = 0 + new_max_references = find_furthermost_citations(new_nodes, new_edges, node, old_search_depth + abs(node.group), "Reference") + add_citations(new_nodes, new_edges, new_max_references, old_search_depth, new_search_depth, "Reference", test_var) + #new_nodes.append(new_max_references) + + new_nodes, new_edges, error_doi_list_ref = init_graph_construction([node.doi_url], new_search_depth, 0, test_var, True, new_nodes, new_edges) + for err_node in error_doi_list_ref: + if err_node not in error_doi_list: + error_doi_list.append(err_node) + + + old_max_citations = find_furthermost_citations(new_nodes, new_edges, node, old_search_height, "Citation") + add_citations(new_nodes, new_edges, old_max_citations, old_search_height, new_search_height, "Citation", test_var) handled_inserted_nodes.append(node) - return(handled_inserted_nodes) + for new_node in new_nodes: + for inserted_node in inserted_nodes: + if new_node.doi_url == inserted_node: + new_node.group = 0 + + return(handled_inserted_nodes, new_nodes, new_edges) def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var = False): @@ -149,20 +207,21 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes completes the references and citations for nodes which were known in non input group ''' - global nodes, edges - nodes = input_nodes_list - edges = input_edges_list + global nodes, edges, error_doi_list + nodes = input_nodes_list.copy() + edges = input_edges_list.copy() + error_doi_list = [] - handled_inserted_nodes = complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var) + handled_inserted_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var) # copy all nodes from inserted_nodes to new node, if node is not in handled_inserted_nodes not_handled_inserted_nodes = [node for node in inserted_nodes if node not in handled_inserted_nodes] - # initializes nodes/edges from input and gets a list with publication objects for citations and references returned - #references_obj_list, citations_obj_list = initialize_nodes_list(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var) - # function calls to begin recursive processing up to max depth/height - #add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var) - #add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var) + # function call to begin recursive processing up to max depth/height for unhandled nodes + nodes, edges, error_doi_list_new = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = new_nodes, input_edges = new_edges) + for err_node in error_doi_list_new: + if err_node not in error_doi_list: + error_doi_list.append(err_node) - nodes, edges, error_doi_list = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges) + return(nodes, edges, error_doi_list) \ No newline at end of file diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index b7b89ab..416be51 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -102,16 +102,33 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_doi_input_list, new_doi_input_list) old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var) - - if (len(inserted_nodes) > 0): - connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var) + + processed_list_copy = processed_list.copy() + valid_edges_copy = valid_edges.copy() # deletes publications and edges from node_list if publications can no longer be reached if (len(deleted_nodes) > 0): processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges) - - - if test_var: - return(processed_list, valid_edges) - else: - return(processed_list, valid_edges, error_doi_list) + + if (len(inserted_nodes) > 0): + inserted_pub_nodes, inserted_edges, error_doi_list_new = connect_old_and_new_input(processed_list_copy, valid_edges_copy, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var) + for err_node in error_doi_list_new: + if err_node not in error_doi_list: + error_doi_list.append(err_node) + + for inserted_node in inserted_pub_nodes: + not_in_nodes = True + for node in processed_list: + if inserted_node.doi_url == node.doi_url: + processed_list.remove(node) + processed_list.append(inserted_node) + not_in_nodes = False + break + if not_in_nodes: + processed_list.append(inserted_node) + + for inserted_edge in inserted_edges: + if inserted_edge not in valid_edges: + valid_edges.append(inserted_edge) + + return(processed_list, valid_edges, error_doi_list) -- GitLab