From b1d80ea86e651be4fe0fdc1769e21e993cf745ad Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Tue, 21 Dec 2021 22:23:01 +0100 Subject: [PATCH] changeed names and added tests --- .../construct_new_graph/add_citations_rec.py | 16 +++++++ .../construct_new_graph/export_to_json.py | 8 +++- .../construct_new_graph/initialize_graph.py | 16 +------ verarbeitung/new_height.json | 1 + verarbeitung/test/construct_graph_unittest.py | 47 ++++++++++++++++++- verarbeitung/test_output.json | 2 +- ...h.py => compare_old_and_new_node_lists.py} | 15 +++--- .../update_graph/delete_nodes_edges.py | 7 ++- verarbeitung/update_graph/update_depth.py | 2 +- .../{Kanten_Vergleich.py => update_edges.py} | 0 verarbeitung/update_graph/update_graph.py | 6 +-- 11 files changed, 86 insertions(+), 34 deletions(-) create mode 100644 verarbeitung/new_height.json rename verarbeitung/update_graph/{Knoten_Vergleich.py => compare_old_and_new_node_lists.py} (78%) rename verarbeitung/update_graph/{Kanten_Vergleich.py => update_edges.py} (100%) diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py index 21a2c7e..87badf4 100644 --- a/verarbeitung/construct_new_graph/add_citations_rec.py +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -22,6 +22,22 @@ sys.path.append("../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub +def create_graph_structure_citations_test(pub, search_depth, search_depth_max, cit_type, test_var, test_nodes, test_edges): + ''' + :param test_nodes: list of publications from unit test + :type test_nodes: List[Publication] + + :param test_edges: list of links from unit test + :type test_edges: List[List[String,String]] + + for unit test purposes only + ''' + global nodes, edges + nodes = test_nodes + edges = test_edges + return(nodes, edges, create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)) + + def get_cit_type_list(pub, cit_type): ''' :param pub: Publication which citations will be added diff --git a/verarbeitung/construct_new_graph/export_to_json.py b/verarbeitung/construct_new_graph/export_to_json.py index 68a25be..fd21dc1 100644 --- a/verarbeitung/construct_new_graph/export_to_json.py +++ b/verarbeitung/construct_new_graph/export_to_json.py @@ -79,8 +79,12 @@ def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False) dict_of_all["nodes"] = list_of_node_dicts dict_of_all["links"] = list_of_edge_dicts if (test_var): - with open('test_output.json','w') as outfile: - json.dump(dict_of_all, outfile) + if json_file != 'json_text.json': + with open(json_file,'w') as outfile: + json.dump(dict_of_all, outfile) + else: + with open('test_output.json','w') as outfile: + json.dump(dict_of_all, outfile) else: with open(json_file,'w') as outfile: json.dump(dict_of_all, outfile) diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index 89825a8..ef17df6 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -27,18 +27,6 @@ from .add_citations_rec import add_citations, create_global_lists_cit def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_max, test_var): ''' - :param doi_input_list: input list of doi from UI - :type doi_input_list: List[String] - - :param search_depth_max: maximum depth to search for references - :type search_depth_max: int - - :param search_height_max: maximum height to search for citations - :type search_height_max: int - - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - for unit test purposes only ''' global nodes, edges @@ -51,8 +39,8 @@ def complete_inner_edges_test(test_nodes, test_edges): :param test_nodes: list of publications from unit test :type test_nodes: List[Publication] - :param test_nodes: list of links from unit test - :type test_nodes: List[List[String,String]] + :param test_edges: list of links from unit test + :type test_edges: List[List[String,String]] for unit test purposes only ''' diff --git a/verarbeitung/new_height.json b/verarbeitung/new_height.json new file mode 100644 index 0000000..f96362a --- /dev/null +++ b/verarbeitung/new_height.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]} \ No newline at end of file diff --git a/verarbeitung/test/construct_graph_unittest.py b/verarbeitung/test/construct_graph_unittest.py index 4b1dabe..757cdf4 100644 --- a/verarbeitung/test/construct_graph_unittest.py +++ b/verarbeitung/test/construct_graph_unittest.py @@ -6,7 +6,7 @@ import sys sys.path.append("../") from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction, initialize_nodes_list_test, complete_inner_edges_test -from verarbeitung.construct_new_graph.add_citations_rec import get_cit_type_list +from verarbeitung.construct_new_graph.add_citations_rec import get_cit_type_list, create_graph_structure_citations_test from verarbeitung.test.input_test import input_test_func class ConstructionTest(unittest.TestCase): @@ -138,8 +138,51 @@ class ConstructionTest(unittest.TestCase): self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_cits), keep_only_dois(pub_lg_1_d_12.citations)) def test_create_graph_structure_citations(self): - print("Hallo") + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + pub_lg_1_h_11 = input_test_func('doi_lg_1_h11') + pub_lg_1_h_11.group = 1 + pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') + pub_lg_1_h_12.group = 1 + pub_lg_1_d_11 = input_test_func('doi_lg_1_d11') + pub_lg_1_d_11.group = -1 + pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') + pub_lg_1_d_12.group = -1 + # checks if citations/references are found and added + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 2, "Citation", True, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12],[['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11, pub_lg_1_d_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) + self.assertCountEqual(cit_list, [pub_lg_1_h_11, pub_lg_1_h_12]) + + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 2, "Reference", True, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12],[['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11, pub_lg_1_d_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) + self.assertCountEqual(cit_list, [pub_lg_1_d_11, pub_lg_1_d_12]) + + # checks if max depth/height is checked before added + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Citation", True, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12],[['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) + self.assertCountEqual(cit_list, []) + + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Reference", True, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12],[['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) + self.assertCountEqual(cit_list, []) + + # checks if max depth/height is checked before added but citation/reference from max depth/height found and added + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Citation", True, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12, pub_lg_1_h_11],[['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12, pub_lg_1_h_11]) + self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h11','doi_lg_1_i']]) + self.assertCountEqual(cit_list, []) + + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Reference", True, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11],[['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11]) + self.assertCountEqual(return_edges, [['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i'],['doi_lg_1_i','doi_lg_1_d11']]) + self.assertCountEqual(cit_list, []) + + def keep_only_dois(nodes): ''' diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json index 50d6e78..f96362a 100644 --- a/verarbeitung/test_output.json +++ b/verarbeitung/test_output.json @@ -1 +1 @@ -{"nodes": [{"doi": "doi_h02", "name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "group": "Input", "depth": 0, "citations": 1}, {"doi": "doi_h1", "name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "group": "Citedby", "depth": 1, "citations": 1}, {"doi": "doi_h2", "name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "group": "Citedby", "depth": 2, "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]} \ No newline at end of file diff --git a/verarbeitung/update_graph/Knoten_Vergleich.py b/verarbeitung/update_graph/compare_old_and_new_node_lists.py similarity index 78% rename from verarbeitung/update_graph/Knoten_Vergleich.py rename to verarbeitung/update_graph/compare_old_and_new_node_lists.py index 55c10f2..f816fea 100644 --- a/verarbeitung/update_graph/Knoten_Vergleich.py +++ b/verarbeitung/update_graph/compare_old_and_new_node_lists.py @@ -1,17 +1,18 @@ #!/usr/bin/env python3 from collections import Counter -def doi_listen_vergleichen(alte,neue): +def compare_old_and_new_node_lists(old_doi_node_list,new_doi_node_list): ''' - :param alte: list of dois from old graph - :type alte: list - :param neue: list of dois from new graph - :type neue: list + :param old_doi_node_list: list of dois from old graph + :type old_doi_node_list: List[String] + + :param new_doi_node_list: list of dois from new graph + :type new_doi_node_list: List[String] function to calculate, which nodes from the old graph are deleted and which are added ''' - dois_from_old_graph = alte #WICHTIG: Keine doppelten DOIs - dois_from_new_graph = neue + dois_from_old_graph = old_doi_node_list #WICHTIG: Keine doppelten DOIs + dois_from_new_graph = new_doi_node_list deleted_nodes = [] common_nodes = [] inserted_nodes = [] diff --git a/verarbeitung/update_graph/delete_nodes_edges.py b/verarbeitung/update_graph/delete_nodes_edges.py index 53e826e..6960f3e 100644 --- a/verarbeitung/update_graph/delete_nodes_edges.py +++ b/verarbeitung/update_graph/delete_nodes_edges.py @@ -17,7 +17,7 @@ import sys from pathlib import Path sys.path.append("../../") -from .Kanten_Vergleich import back_to_valid_edges +from .update_edges import back_to_valid_edges def search_ref_graph_rec(pub): @@ -91,6 +91,5 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list): search_cit_graph_rec(pub) back_to_valid_edges(old_edges_list, usable_nodes) - input_list = usable_nodes.copy() - print(input_list) - #return(usable_nodes, valid_edges) + + return(usable_nodes, old_edges_list) diff --git a/verarbeitung/update_graph/update_depth.py b/verarbeitung/update_graph/update_depth.py index 38560c7..40fc687 100644 --- a/verarbeitung/update_graph/update_depth.py +++ b/verarbeitung/update_graph/update_depth.py @@ -18,7 +18,7 @@ sys.path.append("../../") from verarbeitung.construct_new_graph.add_citations_rec import add_citations from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges -from .Kanten_Vergleich import back_to_valid_edges +from .update_edges import back_to_valid_edges def reduce_max_height(max_height): diff --git a/verarbeitung/update_graph/Kanten_Vergleich.py b/verarbeitung/update_graph/update_edges.py similarity index 100% rename from verarbeitung/update_graph/Kanten_Vergleich.py rename to verarbeitung/update_graph/update_edges.py diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 9a91f94..7bbb907 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -20,7 +20,7 @@ sys.path.append("../../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub -from .Knoten_Vergleich import doi_listen_vergleichen +from .compare_old_and_new_node_lists import compare_old_and_new_node_lists from .delete_nodes_edges import delete_nodes_and_edges from .connect_new_input import connect_old_and_new_input from .update_depth import update_depth @@ -97,11 +97,11 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var) # retrieve which publications are already known, removed, inserted - common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list) + common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_doi_input_list, new_doi_input_list) # deletes publications and edges from node_list if publications can no longer be reached if (len(deleted_nodes) > 0): - delete_nodes_and_edges(processed_list, common_nodes, valid_edges) + processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges) old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var) -- GitLab