From e7b96bb072d74dbb27de2b99cbd34eebe8f4141b Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Mon, 13 Dec 2021 17:23:42 +0100 Subject: [PATCH] changed update_graph_delete function --- verarbeitung/test/Processing_unittest.py | 2 +- .../update_graph/connect_new_input.py | 2 +- verarbeitung/update_graph/import_from_json.py | 2 +- verarbeitung/update_graph/update_graph.py | 2 +- verarbeitung/update_graph/update_graph_del.py | 104 ++++++------------ 5 files changed, 39 insertions(+), 73 deletions(-) diff --git a/verarbeitung/test/Processing_unittest.py b/verarbeitung/test/Processing_unittest.py index b17f5e4..35cade7 100644 --- a/verarbeitung/test/Processing_unittest.py +++ b/verarbeitung/test/Processing_unittest.py @@ -120,4 +120,4 @@ def keep_only_dois(nodes): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() \ No newline at end of file diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index 4f6b91f..32af1fc 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -18,7 +18,7 @@ from pathlib import Path from os import error sys.path.append("../") -from import_from_json import input_from_json +from .import_from_json import input_from_json from verarbeitung.construct_new_graph.Processing import initialize_nodes_list, complete_inner_edges from verarbeitung.construct_new_graph.add_citations_rec import add_citations from verarbeitung.construct_new_graph.export_to_json import output_to_json diff --git a/verarbeitung/update_graph/import_from_json.py b/verarbeitung/update_graph/import_from_json.py index 01a3886..f79e42b 100644 --- a/verarbeitung/update_graph/import_from_json.py +++ b/verarbeitung/update_graph/import_from_json.py @@ -16,7 +16,7 @@ __status__ = "Production" import json import sys -sys.path.append("../../") +sys.path.append("../") from input.publication import Publication, Citation diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 0571cd0..068304a 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -100,7 +100,7 @@ def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, # deletes publications and edges from node_list if publications can no longer be reached if (len(deleted_nodes) > 0): - processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, deleted_nodes, old_doi_input_list, old_edges_list) + processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, common_nodes, old_edges_list) if (len(inserted_nodes) > 0): connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var) diff --git a/verarbeitung/update_graph/update_graph_del.py b/verarbeitung/update_graph/update_graph_del.py index eaa25d6..5879d96 100644 --- a/verarbeitung/update_graph/update_graph_del.py +++ b/verarbeitung/update_graph/update_graph_del.py @@ -20,17 +20,18 @@ sys.path.append("../../") from .Kanten_Vergleich import back_to_valid_edges -def delete_ref_nodes_rec(pub): +def search_ref_graph_rec(pub): ''' - :param pub: pub go get deleted after recursive call + :param pub: pub go get appended to usable_nodes :type pub: Publication - function that removes nodes of group "height", if they aren't reachable from input nodes + function that appends nodes of group "reference" to list usable_nodes, if they are reachable from input nodes ''' for reference in pub.references: - for ref_pub in processed_list: - if (ref_pub.doi_url == reference.doi_url): - + for ref_pub in input_obj_list: + if ((reference.doi_url == ref_pub.doi_url) and (ref_pub not in usable_nodes)): + usable_nodes.append(ref_pub) + # to find a cyclus and avoid recursion error not_in_citations = True for citation in pub.citations: @@ -38,23 +39,20 @@ def delete_ref_nodes_rec(pub): not_in_citations = False break if (not_in_citations): - delete_ref_nodes_rec(ref_pub) - - # removes publication from list after recursion and if it's not of group input - if (pub.group != 0): - processed_list.remove(pub) + search_ref_graph_rec(ref_pub) -def delete_cit_nodes_rec(pub): +def search_cit_graph_rec(pub): ''' - :param pub: publication to be removed after recursive call + :param pub: pub go get appended to usable_nodes :type pub: Publication - function that removes publications of group "depth" from input_list, if they aren't reachable from input nodes - ''' + function that appends nodes of group "citation" to list usable_nodes, if they are reachable from input nodes + ''' for citation in pub.citations: - for cit_pub in processed_list: - if (cit_pub.doi_url == citation.doi_url): + for cit_pub in input_obj_list: + if ((citation.doi_url == cit_pub.doi_url) and (cit_pub not in usable_nodes)): + usable_nodes.append(cit_pub) # to find a cyclus and avoid recursion error not_in_references = True @@ -63,68 +61,36 @@ def delete_cit_nodes_rec(pub): not_in_references = False break if (not_in_references): - delete_cit_nodes_rec(cit_pub) + search_cit_graph_rec(cit_pub) - # removes publication from list after recursion and if it's not of group input - if (pub.group != 0): - processed_list.remove(pub) -def delete_nodes_and_edges(input_list, deleted_nodes, old_doi_input_list, old_edges_list): +def delete_nodes_and_edges(input_list, common_nodes, old_edges_list): ''' :param input_list: list of publications to get reduced :type input_list: List[Publication] - :param deleted_nodes: list of input dois which are not in new call - :type deleted_nodes: List[String] - - :param old_doi_input_list: list of input dois from old call - :type old_doi_input_list: List[String] + :param common_nodes: list of input dois which are in old and new input call + :type common_nodes: List[String] :param old_edges_list: list of links between publications from old call :type old_edges_list: List[List[String,String]] - function to start recursive node removal for references and citations and to return edge list to valid state + function to start recursive node removal for references and citations and to change edge list to valid state ''' - # global list to save the process of removing unneeded publications - global processed_list - processed_list = input_list.copy() - - for del_node in deleted_nodes: - for pub in processed_list: - if (del_node == pub.doi_url): - - # checks for every reference if it is citet more than once. If not it calls deletion function - for reference in pub.references: - only_reference = True - for ref_cit in processed_list: - if (reference == ref_cit.doi_url): - for citation in ref_cit.citations: - if ((citation in old_doi_input_list) and (citation != del_node)): - only_reference = False - break - if (only_reference == False): - break - if (only_reference): - delete_ref_nodes_rec(pub) - - # checks for every citation if it cites more than once. If not it calls deletion function - for citation in pub.citations: - only_reference = True - for cit_ref in processed_list: - if (citation == cit_ref.doi_url): - for reference in cit_ref.references: - if ((reference in old_doi_input_list) and (reference != del_node)): - only_reference = False - break - if (only_reference == False): - break - if (only_reference): - delete_cit_nodes_rec(pub) - - # removes publication of type input after start of recursive call to both directions - processed_list.remove(pub) - - valid_edges = back_to_valid_edges(old_edges_list, processed_list) - return(processed_list, valid_edges) \ No newline at end of file + global usable_nodes, input_obj_list + usable_nodes = [] + input_obj_list = input_list.copy() + + # starts for every common input node a tree-search and adds found nodes to usable_nodes + for common in common_nodes: + for pub in input_obj_list: + if (common == pub.doi_url): + usable_nodes.append(pub) + search_ref_graph_rec(pub) + search_cit_graph_rec(pub) + + valid_edges = back_to_valid_edges(old_edges_list, usable_nodes) + + return(usable_nodes, valid_edges) \ No newline at end of file -- GitLab