From 97cbb8867e6ee018b73ebeb1cd44dcb3b263d652 Mon Sep 17 00:00:00 2001 From: Donnawetter <8loeding@informatik.uni-hamburg.de> Date: Fri, 28 Jan 2022 14:15:58 +0100 Subject: [PATCH] fix comments and reformat files from verarbeitung --- verarbeitung/README.md | 29 +- verarbeitung/construct_new_graph/README.md | 1 - .../construct_new_graph/add_citations_rec.py | 85 +- .../construct_new_graph/export_to_json.py | 30 +- .../construct_new_graph/initialize_graph.py | 74 +- verarbeitung/dev_files/README.md | 2 +- verarbeitung/dev_files/print_graph_test.py | 67 +- verarbeitung/get_pub_from_input.py | 22 +- verarbeitung/process_main.py | 4 +- verarbeitung/test/README.md | 5 +- verarbeitung/test/construct_graph_unittest.py | 466 ++++++----- verarbeitung/test/input_test.py | 152 ++-- verarbeitung/test/update_graph_unittest.py | 758 +++++++++--------- verarbeitung/update_graph/README.md | 1 - .../compare_old_and_new_node_lists.py | 42 +- .../update_graph/connect_new_input.py | 109 +-- .../update_graph/delete_nodes_edges.py | 17 +- verarbeitung/update_graph/import_from_json.py | 35 +- verarbeitung/update_graph/update_depth.py | 16 +- verarbeitung/update_graph/update_edges.py | 5 +- verarbeitung/update_graph/update_graph.py | 24 +- 21 files changed, 1032 insertions(+), 912 deletions(-) diff --git a/verarbeitung/README.md b/verarbeitung/README.md index 10640cb..3cd6c7e 100644 --- a/verarbeitung/README.md +++ b/verarbeitung/README.md @@ -1,6 +1,6 @@ # Projekt CiS-Projekt 2021/22 -Processing-Package to generate theoretical graph for citations and references of given input publications. +Processing-Package to generate a theoretical graph for citations and references of given input publications. ## Usage/Examples @@ -9,53 +9,50 @@ from verarbeitung.process_main import Processing def main(url_list): - Processing(url_list) + Processing(url_list) ``` Grundlegender Prozess: -Es wird von der UI eine Liste an DOIs an die Verarbeitung übergeben und -diese wird dann umgewandelt in eine Knoten-und Kantenmenge, welche die Zitierungen darstellen. -Die Informationen über die Paper und die Zitierungen kommen von der Input Gruppe über den Aufruf -von der Funktion Publication. Die Knoten- und Kantenmengen werden in Form einer -Json Datei an den Output übergeben. +Es wird von der UI eine Liste an DOIs an die Verarbeitung übergeben und diese wird dann umgewandelt in eine Knoten-und +Kantenmenge, welche die Zitierungen darstellen. Die Informationen über die Paper und die Zitierungen kommen von der +Input Gruppe über den Aufruf von der Funktion Publication. Die Knoten- und Kantenmengen werden in Form einer Json Datei +an den Output übergeben. ## Files and functions in directory - get_pub_from_input.py: ```python def get_pub(pub_doi, test_var) ``` -- Gibt für eine DOI ein Klassenobjekt zurück, in dem alle nötigen Informationen gespeichert sind. +- Gibt für eine DOI ein Klassenobjekt zurück, in dem alle nötigen Informationen gespeichert sind. process_main.py: ```python def Processing(url_list) ``` -- Überprüft, ob bereits eine Json Datei existiert und ruft dann entweder die Funktion auf, um - einen neuen Graphen zu erstellen oder die Funktion um einen Vorhandenen zu updaten. +- Überprüft, ob bereits eine Json Datei existiert und ruft dann entweder die Funktion auf, um einen neuen Graphen zu + erstellen oder die Funktion, um einen Vorhandenen zu updaten. start.script.py: - - Wird benötigt, um die Dateien ordnerübergreifend aufzurufen. Nur fürs interne Testen der - Funktionalität - +- Wird benötigt, um die Dateien ordnerübergreifend aufzurufen. Nur fürs interne Testen der + Funktionalität <name>.json: -- sind momentan Beispiele, die an den Output übergeben werden könnten. +- sind momentan Beispiele, die an den Output übergeben werden könnten. ## Testing python -m unittest discover verarbeitung/test -v ## Authors + - Donna Löding - Alina Molkentin -- Xinyi Tang - Judith Große - Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/README.md b/verarbeitung/construct_new_graph/README.md index d73f4eb..8a8bfe1 100644 --- a/verarbeitung/construct_new_graph/README.md +++ b/verarbeitung/construct_new_graph/README.md @@ -24,6 +24,5 @@ export_to_json.py ## Authors - Donna Löding - Alina Molkentin -- Xinyi Tang - Judith Große - Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py index dee665c..9467fe4 100644 --- a/verarbeitung/construct_new_graph/add_citations_rec.py +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -1,28 +1,32 @@ # -*- coding: utf-8 -*- """ -Functions to add citations recursivly for multiple ACS/Nature journals +Functions to add citations recursively for multiple ACS/Nature journals """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" +# __copyright__ = "" +# __credits__ = ["", "", "", ""] +# __license__ = "" +# __version__ = "" +# __maintainer__ = "" -import sys + +import sys from pathlib import Path from os import error + sys.path.append("../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub -def create_graph_structure_citations_test(pub, search_depth, search_depth_max, cit_type, test_var, test_nodes, test_edges): + +def create_graph_structure_citations_test(pub, search_depth, search_depth_max, cit_type, test_var, test_nodes, + test_edges): ''' :param test_nodes: list of publications from unit test :type test_nodes: List[Publication] @@ -35,7 +39,7 @@ def create_graph_structure_citations_test(pub, search_depth, search_depth_max, c global nodes, edges nodes = test_nodes edges = test_edges - return(nodes, edges, create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)) + return (nodes, edges, create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)) def get_cit_type_list(pub, cit_type): @@ -43,17 +47,18 @@ def get_cit_type_list(pub, cit_type): :param pub: Publication which citations will be added :type pub: Publication - :param cit_type: variable to differenciate citation and reference call + :param cit_type: variable to differentiate citation and reference call :type cit_type: String function to return citation or reference list for given pub ''' if cit_type == "Citation": - return(pub.citations) + return (pub.citations) elif cit_type == "Reference": - return(pub.references) + return (pub.references) else: - return(ValueError) + return (ValueError) + def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var): ''' @@ -72,10 +77,10 @@ def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_ :param search_depth_max: maximum depth to search for citations :type search_depth_max: int - :param cit_type: variable to differenciate citation and reference call + :param cit_type: variable to differentiate citation and reference call :type cit_type: String - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean function to create nodes and edges and call create_graph_structure_citations @@ -99,10 +104,10 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty :param search_depth_max: maximum depth to search for citations :type search_depth_max: int - :param cit_type: variable to differenciate citation and reference call + :param cit_type: variable to differentiate citation and reference call :type cit_type: String - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean adds a node for every citing publication unknown @@ -112,38 +117,38 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty citations_pub_obj_list = [] for citation in get_cit_type_list(pub, cit_type): not_in_nodes = True - for node in nodes: # checks every citation for duplication + for node in nodes: # checks every citation for duplication if (citation.doi_url == node.doi_url): not_in_nodes = False break if (not_in_nodes): - if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly + if (search_depth < search_depth_max): # checks if its a test and chooses input function accordingly citation_pub_obj = get_pub(citation.doi_url, test_var) if (type(citation_pub_obj) != Publication): - #print(pub) - continue - + # print(pub) + continue + if (cit_type == "Citation"): citation_pub_obj.group = search_depth + 1 - edges.append([citation_pub_obj.doi_url,pub.doi_url]) + edges.append([citation_pub_obj.doi_url, pub.doi_url]) else: citation_pub_obj.group = -(search_depth + 1) - edges.append([pub.doi_url,citation_pub_obj.doi_url]) - nodes.append(citation_pub_obj) + edges.append([pub.doi_url, citation_pub_obj.doi_url]) + nodes.append(citation_pub_obj) citations_pub_obj_list.append(citation_pub_obj) # adds just the edge if citation already exists else: - if (cit_type == "Citation"): - if ([citation.doi_url,pub.doi_url] not in edges): - edges.append([citation.doi_url,pub.doi_url]) + if (cit_type == "Citation"): + if ([citation.doi_url, pub.doi_url] not in edges): + edges.append([citation.doi_url, pub.doi_url]) else: - if ([pub.doi_url,citation.doi_url] not in edges): - edges.append([pub.doi_url,citation.doi_url]) + if ([pub.doi_url, citation.doi_url] not in edges): + edges.append([pub.doi_url, citation.doi_url]) return citations_pub_obj_list -def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var): +def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var): ''' :param citations_pub_obj_list: list of publications which citations will be added :type citations_pub_obj_list: List[Publication] @@ -154,10 +159,10 @@ def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max :param search_depth_max: maximum depth to search for citations :type search_depth_max: int - :param cit_type: variable to differenciate citation and reference call + :param cit_type: variable to differentiate citation and reference call :type cit_type: String - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean recursive function to implement depth-first-search on citations @@ -167,13 +172,14 @@ def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max new_citation_pub_obj_save_list = [] for pub in citations_pub_obj_list: - new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var) + new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, + test_var) if len(new_citation_pub_obj_list) > 0: new_citation_pub_obj_save_list += new_citation_pub_obj_list - # If the maximum depth has not yet been reached, calls function recursivly with increased depth + # If the maximum depth has not yet been reached, calls function recursively with increased depth if (search_depth < search_depth_max): - process_citations_rec(new_citation_pub_obj_save_list, search_depth+1, search_depth_max, cit_type, test_var) + process_citations_rec(new_citation_pub_obj_save_list, search_depth + 1, search_depth_max, cit_type, test_var) def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var): @@ -193,10 +199,10 @@ def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth :param search_depth_max: maximum depth to search for citations :type search_depth_max: int - :param cit_type: variable to differenciate citation and reference call + :param cit_type: variable to differentiate citation and reference call :type cit_type: String - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean function to call recursive depth-first-search of citations @@ -206,4 +212,3 @@ def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth edges = input_edges process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var) - #return(nodes, edges) \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/export_to_json.py b/verarbeitung/construct_new_graph/export_to_json.py index d093f63..00627df 100644 --- a/verarbeitung/construct_new_graph/export_to_json.py +++ b/verarbeitung/construct_new_graph/export_to_json.py @@ -4,14 +4,15 @@ Functions that format the computed graph to match the interface to the output-pa """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" + +# __copyright__ = "" +# __credits__ = ["", "", "", ""] +# __license__ = "" +# __version__ = "" +# __maintainer__ = "" import json @@ -42,8 +43,9 @@ def format_nodes(nodes): new_dict["citations"] = len(node.citations) list_of_node_dicts.append(new_dict) return list_of_node_dicts - -# creates a list that contains a disctionary for each edge + + +# creates a list that contains a dictionary for each edge # the dictionaries contain the source as keys and the target as values def format_edges(edges): ''' @@ -59,9 +61,9 @@ def format_edges(edges): new_dict_2["target"] = edge[1] list_of_edge_dicts.append(new_dict_2) return list_of_edge_dicts - -def output_to_json(nodes, edges, search_depth, search_height, json_file = 'json_text.json', test_var = False): + +def output_to_json(nodes, edges, search_depth, search_height, json_file='json_text.json', test_var=False): ''' :param nodes: list of publications to export to json :type nodes: List[Publication] @@ -69,7 +71,7 @@ def output_to_json(nodes, edges, search_depth, search_height, json_file = 'json_ :param edges: list of links to export to json :type edges: List[String,String] - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean function to export nodes and links as a dictionary to json file @@ -82,11 +84,11 @@ def output_to_json(nodes, edges, search_depth, search_height, json_file = 'json_ dict_of_all["depth_height"] = [search_depth, search_height] if (test_var): if json_file != 'json_text.json': - with open(json_file,'w') as outfile: + with open(json_file, 'w') as outfile: json.dump(dict_of_all, outfile) else: - with open('test_output.json','w') as outfile: + with open('test_output.json', 'w') as outfile: json.dump(dict_of_all, outfile) else: - with open(json_file,'w') as outfile: + with open(json_file, 'w') as outfile: json.dump(dict_of_all, outfile) diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index 4615d5e..81571db 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -4,20 +4,22 @@ Functions to generate a graph representing citations between multiple ACS/Nature """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" +# __copyright__ = "" +# __credits__ = ["", "", "", ""] +# __license__ = "" +# __version__ = "" +# __maintainer__ = "" -import sys + +import sys import gc from pathlib import Path from os import error + sys.path.append("../") from input.publication import Publication @@ -25,6 +27,7 @@ from verarbeitung.get_pub_from_input import get_pub from .export_to_json import output_to_json from .add_citations_rec import add_citations, create_global_lists_cit + def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_max, test_var): ''' for unit test purposes only @@ -32,7 +35,8 @@ def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_m global nodes, edges nodes = [] edges = [] - return(initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var)) + return (initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var)) + def complete_inner_edges_test(test_nodes, test_edges): ''' @@ -44,12 +48,13 @@ def complete_inner_edges_test(test_nodes, test_edges): for unit test purposes only ''' - + global nodes, edges nodes = test_nodes edges = test_edges complete_inner_edges() - return(nodes, edges) + return (nodes, edges) + def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): ''' @@ -62,36 +67,36 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t :param search_height_max: maximum height to search for citations :type search_height_max: int - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean - adds input dois to nodes and retrieves citations and references for input publications + adds input DOIs to nodes and retrieves citations and references for input publications ''' # saves found citations and references in lists references_pub_obj_list = [] citations_pub_obj_list = [] - for pub_doi in doi_input_list: #iterates over every incoming doi + for pub_doi in doi_input_list: # iterates over every incoming doi pub = get_pub(pub_doi, test_var) if (type(pub) != Publication): - #print(pub) + # print(pub) error_doi_list.append(pub_doi) - continue + continue # checks if publication already exists in nodes - not_in_nodes = True #boolean value to check if a node already exists in the set of nodes + not_in_nodes = True # boolean value to check if a node already exists in the set of nodes - for node in nodes: #iterates over every node in the set of nodes - if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set - not_in_nodes = False #false --> node will not be created + for node in nodes: # iterates over every node in the set of nodes + if (pub.doi_url == node.doi_url): # determines that a node with this DOI already is in the set + not_in_nodes = False # false --> node will not be created node.group = 0 break - if (not_in_nodes): #there is no node with this doi in the set - nodes.append(pub) #appends Publication Object + if (not_in_nodes): # there is no node with this DOI in the set + nodes.append(pub) # appends Publication Object pub.group = 0 else: - doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list + doi_input_list.remove(pub_doi) # deletes the DOI-duplicate from input list # inserts references as publication objects into list and # inserts first depth references into nodes/edges if maximum search depth > 0 @@ -103,11 +108,10 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var): citations_pub_obj_list.append(citation) - return(references_pub_obj_list, citations_pub_obj_list) - + return (references_pub_obj_list, citations_pub_obj_list) -def complete_inner_edges(update_var = False, input_nodes = [], input_edges = []): +def complete_inner_edges(update_var=False, input_nodes=[], input_edges=[]): ''' :param update_var: variable to check if call is from update_graph with known nodes and edges or fresh construction :type update_var: boolean @@ -122,7 +126,7 @@ def complete_inner_edges(update_var = False, input_nodes = [], input_edges = []) ''' if update_var: global nodes, edges - + nodes = input_nodes edges = input_edges @@ -136,10 +140,11 @@ def complete_inner_edges(update_var = False, input_nodes = [], input_edges = []) for reference in node.references: for pub in nodes: if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)): - edges.append([node.doi_url,reference.doi_url]) + edges.append([node.doi_url, reference.doi_url]) -def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, update_var = False, input_nodes = [], input_edges = []): +def init_graph_construction(doi_input_list, search_depth, search_height, test_var=False, update_var=False, + input_nodes=[], input_edges=[]): ''' :param doi_input_list: input list of doi from UI :type doi_input_list: List[String] @@ -150,7 +155,7 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va :param search_depth: maximum depth to search for references :type search_depth: int - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean :param update_var: variable to check if call is from update_graph with known nodes and edges or fresh construction @@ -175,21 +180,21 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va # ERROR- if a negative number is entered for depth if (search_depth < 0): - print("Error, search_depth of search must be positive") + print("Error, search_depth of search must be positive") - # creates empty lists to save nodes and edges global nodes, edges, error_doi_list if update_var: nodes = input_nodes - edges = input_edges + edges = input_edges else: nodes = [] edges = [] error_doi_list = [] # initializes nodes/edges from input and gets a list with publication objects for citations and references returned - references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) + references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list, search_depth, search_height, + test_var) # function calls to begin recursive processing up to max depth/height add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var) @@ -205,5 +210,4 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va del edges gc.collect() - - return(new_nodes, new_edges, error_doi_list) + return (new_nodes, new_edges, error_doi_list) diff --git a/verarbeitung/dev_files/README.md b/verarbeitung/dev_files/README.md index 53ccd02..f42d3f9 100644 --- a/verarbeitung/dev_files/README.md +++ b/verarbeitung/dev_files/README.md @@ -1 +1 @@ -Dieser Ordner ist nur für uns intern, um Testläufe mir echten DOIs zu starten. \ No newline at end of file +Dieser Ordner ist nur für uns intern, um Testläufe mit echten DOIs zu starten. \ No newline at end of file diff --git a/verarbeitung/dev_files/print_graph_test.py b/verarbeitung/dev_files/print_graph_test.py index b45e909..9fa8441 100644 --- a/verarbeitung/dev_files/print_graph_test.py +++ b/verarbeitung/dev_files/print_graph_test.py @@ -4,24 +4,26 @@ Functions to test and print the nodes and edges sets """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" + +# __copyright__ = "" +# __credits__ = ["", "", "", ""] +# __license__ = "" +# __version__ = "" +# __maintainer__ = "" import sys -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +# sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') sys.path.append("../../") from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction from verarbeitung.update_graph.import_from_json import input_from_json from verarbeitung.update_graph.update_graph import update_graph + # a function to print nodes and edges from a graph def print_graph(nodes, edges): print("Knoten:\n") @@ -29,11 +31,12 @@ def print_graph(nodes, edges): print(node.title, "\n") print("\nKanten:\n") for edge in edges: - print(edge,"\n") + print(edge, "\n") print(len(nodes)) print(len(edges)) print(" ") + def print_extended_graph(nodes, edges): print("Knoten:\n") for node in nodes: @@ -45,11 +48,12 @@ def print_extended_graph(nodes, edges): print(citation.doi_url) print("\nKanten:\n") for edge in edges: - print(edge,"\n") + print(edge, "\n") print(len(nodes)) print(len(edges)) print(" ") + def print_simple(nodes, edges): # for node in nodes: # print(node) @@ -59,36 +63,37 @@ def print_simple(nodes, edges): print(len(edges)) print(" ") -# program test with some random dois + +# program test with some random DOIs def try_known_publications(): doi_list = [] doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') + # doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') - #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + # arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') - #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') + # arr.append('https://doi.org/10.1021/acs.jcim.0c00741') + + # arr.append('https://doi.org/10.1021/ci700007b') + # doi_list.append('https://doi.org/10.1021/acs.jcim.5b00292') - #arr.append('https://doi.org/10.1021/ci700007b') - #doi_list.append('https://doi.org/10.1021/acs.jcim.5b00292') - - #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.0c00675') - #url = sys.argv[1] - #arr.append[url] + # doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.0c00675') + # url = sys.argv[1] + # arr.append[url] + nodes, edges = init_graph_construction(doi_list, 2, 2) - nodes, edges = init_graph_construction(doi_list,2,2) + print_graph(nodes, edges) - print_graph(nodes, edges) + return (nodes, edges) - return(nodes, edges) def try_delete_nodes(): doi_list = [] doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') - nodes, edges = init_graph_construction(doi_list,1,1) - #print_simple(nodes, edges) + # doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') + nodes, edges = init_graph_construction(doi_list, 1, 1) + # print_simple(nodes, edges) # list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') # doi_list = [] @@ -96,13 +101,15 @@ def try_delete_nodes(): # valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py) # print_simple(valid_nodes, valid_edges) + def try_import(): nodes, edges = input_from_json('json_text.json') - print_extended_graph(nodes,edges) + print_extended_graph(nodes, edges) + -#nodes, edges = try_known_publications() -#nodes_new, edges_new = input_from_json("json_text.json") -#print_graph(nodes_new, edges_new) +# nodes, edges = try_known_publications() +# nodes_new, edges_new = input_from_json("json_text.json") +# print_graph(nodes_new, edges_new) try_delete_nodes() -#try_import() \ No newline at end of file +# try_import() diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py index 6bdce3b..f27b72d 100644 --- a/verarbeitung/get_pub_from_input.py +++ b/verarbeitung/get_pub_from_input.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- """ -A function to return an object of Type Publication for a given doi +A function to return an object of Type Publication for a given DOI """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" @@ -14,7 +14,7 @@ __status__ = "Production" #__maintainer__ = "" -import sys +import sys from pathlib import Path sys.path.append("../") @@ -27,20 +27,20 @@ def get_pub(pub_doi, test_var): :param pub_doi: input doi to get Publication object for :type pub_doi: String - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean - function to return an object of type Publication for given input doi depending on whether its a test or url doi + function to return an object of type Publication for given input doi depending on whether its a test or url DOI ''' - #checks if it's a test and chooses appropiate function - if(test_var): - pub = input_test_func(pub_doi) + # checks if it's a test and chooses appropriate function + if(test_var): + pub = input_test_func(pub_doi) - #checks that it isnt a test and chooses standart-input function - else: + # checks that it isn't a test and chooses standard-input function + else: inter = Input() try: - pub = inter.get_publication(pub_doi) #creates an object of class Publication) + pub = inter.get_publication(pub_doi) # creates an object of class Publication) except ValueError: return(ValueError) except IndexError: diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py index 581c426..2567c24 100644 --- a/verarbeitung/process_main.py +++ b/verarbeitung/process_main.py @@ -4,7 +4,7 @@ main function to call to generate a graph representing citations between multipl """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" @@ -24,6 +24,7 @@ from verarbeitung.construct_new_graph.export_to_json import output_to_json from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction from verarbeitung.update_graph.update_graph import update_graph + def Processing(url_list, search_depth, search_height, json_file = 'json_text.json'): ''' :param url_list: list of urls to construct publication graph for @@ -53,4 +54,3 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso output_to_json(nodes, edges, search_depth, search_height, json_file) return error_doi_list - \ No newline at end of file diff --git a/verarbeitung/test/README.md b/verarbeitung/test/README.md index 91a9193..f457927 100644 --- a/verarbeitung/test/README.md +++ b/verarbeitung/test/README.md @@ -11,15 +11,14 @@ input_test.py construct_graph_unittest.py -- Führt diverse Tests zur Konstruktion des Graphen ohne Vorkenntnisse mit eigenen Beispielen und unserer Input_test Funktion aus. +- Führt diverse Tests zur Konstruktion des Graphen ohne Vorkenntnisse mit eigenen Beispielen und unserer Input_test Funktion aus. update_graph_unittest.py -- Führt diverse Tests zum Updaten eines alten Graphs mit aktualisierter Input Liste mit eigenen Beispielen und unserer Input_test Funktion aus. +- Führt diverse Tests zum Updaten eines alten Graphen mit aktualisierter Input Liste mit eigenen Beispielen und unserer Input_test Funktion aus. ## Authors - Donna Löding - Alina Molkentin -- Xinyi Tang - Judith Große - Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/test/construct_graph_unittest.py b/verarbeitung/test/construct_graph_unittest.py index d188727..1cef0dd 100644 --- a/verarbeitung/test/construct_graph_unittest.py +++ b/verarbeitung/test/construct_graph_unittest.py @@ -1,245 +1,279 @@ import unittest -import sys +import sys sys.path.append("../") -from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction, initialize_nodes_list_test, complete_inner_edges_test +from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction, initialize_nodes_list_test, \ + complete_inner_edges_test from verarbeitung.construct_new_graph.add_citations_rec import get_cit_type_list, create_graph_structure_citations_test from verarbeitung.construct_new_graph.export_to_json import format_nodes, format_edges from verarbeitung.get_pub_from_input import input_test_func + class ConstructionTest(unittest.TestCase): - maxDiff = None + maxDiff = None + + def testCycle(self): + nodes, edges, err_list = init_graph_construction(['doiz1'], 1, 1, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) + nodes, edges, err_list = init_graph_construction(['doiz1'], 2, 2, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) - def testCycle(self): - nodes, edges, err_list = init_graph_construction(['doiz1'],1,1,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) + def testEmptyDepthHeight(self): + nodes, edges, err_list = init_graph_construction(['doi1'], 0, 0, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1']) + self.assertCountEqual(edges, []) - nodes, edges, err_list = init_graph_construction(['doiz1'],2,2,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) + nodes, edges, err_list = init_graph_construction(['doi1', 'doi2'], 0, 0, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1', 'doi2']) + self.assertCountEqual(edges, [['doi1', 'doi2']]) + nodes, edges, err_list = init_graph_construction(['doi1', 'doi2', 'doi3'], 0, 0, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1', 'doi2', 'doi3']) + self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) - def testEmptyDepthHeight(self): - nodes, edges, err_list = init_graph_construction(['doi1'],0,0,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi1']) - self.assertCountEqual(edges, []) + def testInnerEdges(self): + nodes, edges, err_list = init_graph_construction(['doi_ie1'], 1, 1, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi_ie1', 'doi_ie2', 'doi_ie3']) + self.assertCountEqual(edges, [['doi_ie1', 'doi_ie2'], ['doi_ie3', 'doi_ie1'], ['doi_ie3', 'doi_ie2']]) - nodes, edges, err_list = init_graph_construction(['doi1', 'doi2'],0,0,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doi1','doi2']) - self.assertCountEqual(edges, [['doi1', 'doi2']]) + def testRightHeight(self): + nodes, edges, err_list = init_graph_construction(['doi_h01'], 0, 1, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi_h01']) + self.assertCountEqual(edges, []) - nodes, edges, err_list = init_graph_construction(['doi1', 'doi2', 'doi3'],0,0,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) - self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) + nodes, edges, err_list = init_graph_construction(['doi_h02'], 0, 1, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi_h02', 'doi_h1']) + self.assertCountEqual(edges, [['doi_h1', 'doi_h02']]) + nodes, edges, err_list = init_graph_construction(['doi_h02'], 0, 2, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi_h02', 'doi_h1', 'doi_h2']) + self.assertCountEqual(edges, [['doi_h1', 'doi_h02'], ['doi_h2', 'doi_h1']]) - def testInnerEdges(self): - nodes, edges, err_list = init_graph_construction(['doi_ie1'],1,1,True,False) + def testRightDepth(self): + nodes, edges, err_list = init_graph_construction(['doi_d01'], 1, 0, True, False) doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) - self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) - - def testRightHeight(self): - nodes, edges, err_list = init_graph_construction(['doi_h01'],0,1,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_h01']) - self.assertCountEqual(edges, []) - - nodes, edges, err_list = init_graph_construction(['doi_h02'],0,1,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) - self.assertCountEqual(edges, [['doi_h1','doi_h02']]) - - nodes, edges, err_list = init_graph_construction(['doi_h02'],0,2,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) - self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) - - def testRightDepth(self): - nodes, edges, err_list = init_graph_construction(['doi_d01'],1,0,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_d01']) - self.assertCountEqual(edges, []) - - nodes, edges, err_list = init_graph_construction(['doi_d02'],1,0,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) - self.assertCountEqual(edges, [['doi_d02','doi_d1']]) - - nodes, edges, err_list = init_graph_construction(['doi_d02'],2,0,True,False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) - self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) - - def test_incorrect_input_dois(self): - nodes, edges, err_list = init_graph_construction(['doi1ic', 'doi2ic'],1,1, True, False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, []) - self.assertCountEqual(edges, []) - self.assertCountEqual(err_list, ['doi1ic', 'doi2ic']) - - nodes, edges, err_list = init_graph_construction(['doi1ic', 'doi2ic'],2,2, True, False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, []) - self.assertCountEqual(edges, []) - self.assertCountEqual(err_list, ['doi1ic', 'doi2ic']) - - nodes, edges, err_list = init_graph_construction(['doi1', 'doi2ic'],1,1, True, False) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doi1', 'doi2', 'doi3']) - self.assertCountEqual(edges, [['doi1', 'doi2'], ['doi3', 'doi1']]) - self.assertCountEqual(err_list, ['doi2ic']) - - - ## Ab hier die Tests für die einzelnen Funktionen ## - - # initialize_graph.py: - - def test_initialize_nodes_list(self): - references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i','doi_lg_2_i'], 0, 0, True) - doi_references = keep_only_dois(references_pub_obj_list) - doi_citations = keep_only_dois(citations_pub_obj_list) - self.assertCountEqual(doi_references, []) - self.assertCountEqual(doi_citations, []) - - references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i','doi_lg_2_i'], 1, 1, True) - doi_references = keep_only_dois(references_pub_obj_list) - doi_citations = keep_only_dois(citations_pub_obj_list) - self.assertCountEqual(doi_references, ['doi_lg_1_d11','doi_lg_1_d12','doi_lg_2_d11','doi_lg_2_d12']) - self.assertCountEqual(doi_citations, ['doi_lg_1_h11','doi_lg_1_h12','doi_cg_i','doi_lg_2_h11','doi_lg_2_h12']) - - def test_complete_inner_edges(self): - pub_lg_1_i = input_test_func('doi_lg_1_i') - pub_lg_1_i.group = 0 - pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') - pub_lg_1_h_12.group = 1 - pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') - pub_lg_1_d_12.group = -1 - nodes = [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12] - edges = [['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h12','doi_lg_1_i']] - processed_nodes, processed_edges = complete_inner_edges_test(nodes, edges) - self.assertCountEqual(processed_nodes, [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12]) - self.assertCountEqual(processed_edges, [['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h12','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_d12']]) - - # add_citations_rec.py: - - def test_get_type_list(self): - pub_lg_1_i = input_test_func('doi_lg_1_i') - pub_lg_1_i.group = 0 - self.assertEqual(get_cit_type_list(pub_lg_1_i, "Hallo"), ValueError) - - pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') - pub_lg_1_h_12.group = 1 - pub_lg_1_h_12_refs = get_cit_type_list(pub_lg_1_h_12, "Reference") - pub_lg_1_h_12_cits = get_cit_type_list(pub_lg_1_h_12, "Citation") - self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_refs), keep_only_dois(pub_lg_1_h_12.references)) - self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_cits), keep_only_dois(pub_lg_1_h_12.citations)) - - pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') - pub_lg_1_d_12.group = -1 - pub_lg_1_d_12_refs = get_cit_type_list(pub_lg_1_d_12, "Reference") - pub_lg_1_d_12_cits = get_cit_type_list(pub_lg_1_d_12, "Citation") - self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_refs), keep_only_dois(pub_lg_1_d_12.references)) - self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_cits), keep_only_dois(pub_lg_1_d_12.citations)) - - def test_create_graph_structure_citations(self): - pub_lg_1_i = input_test_func('doi_lg_1_i') - pub_lg_1_i.group = 0 - pub_lg_1_h_11 = input_test_func('doi_lg_1_h11') - pub_lg_1_h_11.group = 1 - pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') - pub_lg_1_h_12.group = 1 - pub_lg_1_d_11 = input_test_func('doi_lg_1_d11') - pub_lg_1_d_11.group = -1 - pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') - pub_lg_1_d_12.group = -1 - - # checks if citations/references are found and added - return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 2, "Citation", True, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12],[['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) - self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11, pub_lg_1_d_12]) - self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) - self.assertCountEqual(cit_list, [pub_lg_1_h_11, pub_lg_1_h_12]) - - return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 2, "Reference", True, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12],[['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) - self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11, pub_lg_1_d_12]) - self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) - self.assertCountEqual(cit_list, [pub_lg_1_d_11, pub_lg_1_d_12]) - - # checks if max depth/height is checked before added - return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Citation", True, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12],[['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) - self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12]) - self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) - self.assertCountEqual(cit_list, []) - - return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Reference", True, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12],[['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) - self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12]) - self.assertCountEqual(return_edges, [['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) - self.assertCountEqual(cit_list, []) - - # checks if max depth/height is checked before added but citation/reference from max depth/height found and added - return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Citation", True, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12, pub_lg_1_h_11],[['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12']]) - self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12, pub_lg_1_h_11]) - self.assertCountEqual(return_edges, [['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h11','doi_lg_1_i']]) - self.assertCountEqual(cit_list, []) - - return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Reference", True, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11],[['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) - self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11]) - self.assertCountEqual(return_edges, [['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i'],['doi_lg_1_i','doi_lg_1_d11']]) - self.assertCountEqual(cit_list, []) - - - - ## export_to_json.py: - - def test_format_nodes(self): - pub_lg_1_i = input_test_func('doi_lg_1_i') - pub_lg_1_i.group = 0 - pub_lg_1_h_11 = input_test_func('doi_lg_1_h11') - pub_lg_1_h_11.group = 1 - pub_lg_1_d_11 = input_test_func('doi_lg_1_d11') - pub_lg_1_d_11.group = -1 - - return_list_of_node_dicts = format_nodes([pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_d_11]) - check_list_of_node_dicts = [ {"doi": 'doi_lg_1_i', "name": 'title_lg_1_i', "author": ['contributor_lg_1_i'], "year": 'date_lg_1_i', "journal": 'journal_lg_1_i', "abstract": None, "group": 'Input', "depth": 0, "citations": 2}, - {"doi": 'doi_lg_1_h11', "name": 'title_lg_1_h11', "author": ['contributor_lg_1_h11'], "year": 'date_lg_1_h11', "journal": 'journal_lg_1_h11', "abstract": None, "group": 'Citedby', "depth": 1, "citations": 2}, - {"doi": 'doi_lg_1_d11', "name": 'title_lg_1_d11', "author": ['contributor_lg_1_d11'], "year": 'date_lg_1_d11', "journal": 'journal_lg_1_d11', "abstract": None, "group": 'Reference', "depth": -1, "citations": 1}] - - self.assertCountEqual(return_list_of_node_dicts, check_list_of_node_dicts) - - - def test_format_edges(self): - return_list_of_edges = format_edges([['doi_lg_1_i','doi_lg_1_d11'],['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h11','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_i']]) - check_list_of_edges = [{"source": 'doi_lg_1_i', "target": 'doi_lg_1_d11'},{"source": 'doi_lg_1_i', "target": 'doi_lg_1_d12'}, - {"source": 'doi_lg_1_h11', "target": 'doi_lg_1_i'},{"source": 'doi_lg_1_h12', "target": 'doi_lg_1_i'}] - - self.assertCountEqual(return_list_of_edges, check_list_of_edges) - - - - - + self.assertCountEqual(doi_nodes, ['doi_d01']) + self.assertCountEqual(edges, []) + + nodes, edges, err_list = init_graph_construction(['doi_d02'], 1, 0, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi_d02', 'doi_d1']) + self.assertCountEqual(edges, [['doi_d02', 'doi_d1']]) + + nodes, edges, err_list = init_graph_construction(['doi_d02'], 2, 0, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi_d02', 'doi_d1', 'doi_d2']) + self.assertCountEqual(edges, [['doi_d02', 'doi_d1'], ['doi_d1', 'doi_d2']]) + + def test_incorrect_input_dois(self): + nodes, edges, err_list = init_graph_construction(['doi1ic', 'doi2ic'], 1, 1, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, []) + self.assertCountEqual(edges, []) + self.assertCountEqual(err_list, ['doi1ic', 'doi2ic']) + + nodes, edges, err_list = init_graph_construction(['doi1ic', 'doi2ic'], 2, 2, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, []) + self.assertCountEqual(edges, []) + self.assertCountEqual(err_list, ['doi1ic', 'doi2ic']) + + nodes, edges, err_list = init_graph_construction(['doi1', 'doi2ic'], 1, 1, True, False) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1', 'doi2', 'doi3']) + self.assertCountEqual(edges, [['doi1', 'doi2'], ['doi3', 'doi1']]) + self.assertCountEqual(err_list, ['doi2ic']) + + ## From here the tests for the individual functions ## + + # initialize_graph.py: + + def test_initialize_nodes_list(self): + references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i', 'doi_lg_2_i'], 0, 0, + True) + doi_references = keep_only_dois(references_pub_obj_list) + doi_citations = keep_only_dois(citations_pub_obj_list) + self.assertCountEqual(doi_references, []) + self.assertCountEqual(doi_citations, []) + + references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i', 'doi_lg_2_i'], 1, 1, + True) + doi_references = keep_only_dois(references_pub_obj_list) + doi_citations = keep_only_dois(citations_pub_obj_list) + self.assertCountEqual(doi_references, ['doi_lg_1_d11', 'doi_lg_1_d12', 'doi_lg_2_d11', 'doi_lg_2_d12']) + self.assertCountEqual(doi_citations, + ['doi_lg_1_h11', 'doi_lg_1_h12', 'doi_cg_i', 'doi_lg_2_h11', 'doi_lg_2_h12']) + + def test_complete_inner_edges(self): + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') + pub_lg_1_h_12.group = 1 + pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') + pub_lg_1_d_12.group = -1 + nodes = [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12] + edges = [['doi_lg_1_i', 'doi_lg_1_d12'], ['doi_lg_1_h12', 'doi_lg_1_i']] + processed_nodes, processed_edges = complete_inner_edges_test(nodes, edges) + self.assertCountEqual(processed_nodes, [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12]) + self.assertCountEqual(processed_edges, [['doi_lg_1_i', 'doi_lg_1_d12'], ['doi_lg_1_h12', 'doi_lg_1_i'], + ['doi_lg_1_h12', 'doi_lg_1_d12']]) + + # add_citations_rec.py: + + def test_get_type_list(self): + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + self.assertEqual(get_cit_type_list(pub_lg_1_i, "Hallo"), ValueError) + + pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') + pub_lg_1_h_12.group = 1 + pub_lg_1_h_12_refs = get_cit_type_list(pub_lg_1_h_12, "Reference") + pub_lg_1_h_12_cits = get_cit_type_list(pub_lg_1_h_12, "Citation") + self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_refs), keep_only_dois(pub_lg_1_h_12.references)) + self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_cits), keep_only_dois(pub_lg_1_h_12.citations)) + + pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') + pub_lg_1_d_12.group = -1 + pub_lg_1_d_12_refs = get_cit_type_list(pub_lg_1_d_12, "Reference") + pub_lg_1_d_12_cits = get_cit_type_list(pub_lg_1_d_12, "Citation") + self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_refs), keep_only_dois(pub_lg_1_d_12.references)) + self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_cits), keep_only_dois(pub_lg_1_d_12.citations)) + + def test_create_graph_structure_citations(self): + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + pub_lg_1_h_11 = input_test_func('doi_lg_1_h11') + pub_lg_1_h_11.group = 1 + pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') + pub_lg_1_h_12.group = 1 + pub_lg_1_d_11 = input_test_func('doi_lg_1_d11') + pub_lg_1_d_11.group = -1 + pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') + pub_lg_1_d_12.group = -1 + + # checks if citations/references are found and added + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 2, "Citation", True, + [pub_lg_1_i, pub_lg_1_d_11, + pub_lg_1_d_12], + [['doi_lg_1_i', 'doi_lg_1_d11'], + ['doi_lg_1_i', 'doi_lg_1_d12']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11, pub_lg_1_d_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_i', 'doi_lg_1_d11'], ['doi_lg_1_i', 'doi_lg_1_d12'], + ['doi_lg_1_h11', 'doi_lg_1_i'], ['doi_lg_1_h12', 'doi_lg_1_i']]) + self.assertCountEqual(cit_list, [pub_lg_1_h_11, pub_lg_1_h_12]) + + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 2, "Reference", + True, [pub_lg_1_i, pub_lg_1_h_11, + pub_lg_1_h_12], + [['doi_lg_1_h11', 'doi_lg_1_i'], + ['doi_lg_1_h12', 'doi_lg_1_i']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11, pub_lg_1_d_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_i', 'doi_lg_1_d11'], ['doi_lg_1_i', 'doi_lg_1_d12'], + ['doi_lg_1_h11', 'doi_lg_1_i'], ['doi_lg_1_h12', 'doi_lg_1_i']]) + self.assertCountEqual(cit_list, [pub_lg_1_d_11, pub_lg_1_d_12]) + + # checks if max depth/height is checked before added + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Citation", True, + [pub_lg_1_i, pub_lg_1_d_11, + pub_lg_1_d_12], + [['doi_lg_1_i', 'doi_lg_1_d11'], + ['doi_lg_1_i', 'doi_lg_1_d12']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_i', 'doi_lg_1_d11'], ['doi_lg_1_i', 'doi_lg_1_d12']]) + self.assertCountEqual(cit_list, []) + + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Reference", + True, [pub_lg_1_i, pub_lg_1_h_11, + pub_lg_1_h_12], + [['doi_lg_1_h11', 'doi_lg_1_i'], + ['doi_lg_1_h12', 'doi_lg_1_i']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12]) + self.assertCountEqual(return_edges, [['doi_lg_1_h11', 'doi_lg_1_i'], ['doi_lg_1_h12', 'doi_lg_1_i']]) + self.assertCountEqual(cit_list, []) + + # checks if max depth/height is checked before added but citation/reference from max depth/height found and added + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Citation", True, + [pub_lg_1_i, pub_lg_1_d_11, + pub_lg_1_d_12, pub_lg_1_h_11], + [['doi_lg_1_i', 'doi_lg_1_d11'], + ['doi_lg_1_i', 'doi_lg_1_d12']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_d_11, pub_lg_1_d_12, pub_lg_1_h_11]) + self.assertCountEqual(return_edges, [['doi_lg_1_i', 'doi_lg_1_d11'], ['doi_lg_1_i', 'doi_lg_1_d12'], + ['doi_lg_1_h11', 'doi_lg_1_i']]) + self.assertCountEqual(cit_list, []) + + return_nodes, return_edges, cit_list = create_graph_structure_citations_test(pub_lg_1_i, 1, 1, "Reference", + True, [pub_lg_1_i, pub_lg_1_h_11, + pub_lg_1_h_12, + pub_lg_1_d_11], + [['doi_lg_1_h11', 'doi_lg_1_i'], + ['doi_lg_1_h12', 'doi_lg_1_i']]) + self.assertCountEqual(return_nodes, [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_h_12, pub_lg_1_d_11]) + self.assertCountEqual(return_edges, [['doi_lg_1_h11', 'doi_lg_1_i'], ['doi_lg_1_h12', 'doi_lg_1_i'], + ['doi_lg_1_i', 'doi_lg_1_d11']]) + self.assertCountEqual(cit_list, []) + + # export_to_json.py: + + def test_format_nodes(self): + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + pub_lg_1_h_11 = input_test_func('doi_lg_1_h11') + pub_lg_1_h_11.group = 1 + pub_lg_1_d_11 = input_test_func('doi_lg_1_d11') + pub_lg_1_d_11.group = -1 + + return_list_of_node_dicts = format_nodes([pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_d_11]) + check_list_of_node_dicts = [ + {"doi": 'doi_lg_1_i', "name": 'title_lg_1_i', "author": ['contributor_lg_1_i'], "year": 'date_lg_1_i', + "journal": 'journal_lg_1_i', "abstract": None, "group": 'Input', "depth": 0, "citations": 2}, + {"doi": 'doi_lg_1_h11', "name": 'title_lg_1_h11', "author": ['contributor_lg_1_h11'], + "year": 'date_lg_1_h11', "journal": 'journal_lg_1_h11', "abstract": None, "group": 'Citedby', "depth": 1, + "citations": 2}, + {"doi": 'doi_lg_1_d11', "name": 'title_lg_1_d11', "author": ['contributor_lg_1_d11'], + "year": 'date_lg_1_d11', "journal": 'journal_lg_1_d11', "abstract": None, "group": 'Reference', + "depth": -1, "citations": 1}] + + self.assertCountEqual(return_list_of_node_dicts, check_list_of_node_dicts) + + def test_format_edges(self): + return_list_of_edges = format_edges( + [['doi_lg_1_i', 'doi_lg_1_d11'], ['doi_lg_1_i', 'doi_lg_1_d12'], ['doi_lg_1_h11', 'doi_lg_1_i'], + ['doi_lg_1_h12', 'doi_lg_1_i']]) + check_list_of_edges = [{"source": 'doi_lg_1_i', "target": 'doi_lg_1_d11'}, + {"source": 'doi_lg_1_i', "target": 'doi_lg_1_d12'}, + {"source": 'doi_lg_1_h11', "target": 'doi_lg_1_i'}, + {"source": 'doi_lg_1_h12', "target": 'doi_lg_1_i'}] + + self.assertCountEqual(return_list_of_edges, check_list_of_edges) + def keep_only_dois(nodes): - ''' + ''' :param nodes: input list of nodes of type Publication :type nodes: List[Publication] - gets nodes of type pub and return only their doi + gets nodes of type pub and return only their DOI ''' - doi_list = [] - for node in nodes: - doi_list.append(node.doi_url) - return doi_list + doi_list = [] + for node in nodes: + doi_list.append(node.doi_url) + return doi_list if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/verarbeitung/test/input_test.py b/verarbeitung/test/input_test.py index 352f983..7164e23 100644 --- a/verarbeitung/test/input_test.py +++ b/verarbeitung/test/input_test.py @@ -1,4 +1,5 @@ import sys + sys.path.append("../") from input.publication import Publication, Citation @@ -6,10 +7,10 @@ from input.publication import Publication, Citation def input_test_func(pub_doi): ''' - :param pub_doi: pub doi to find publication in list_of_arrays + :param pub_doi: pub DOI to find publication in list_of_arrays :type pub_doi: String - returns the publication class for given doi + returns the publication class for given DOI ''' for array in list_of_arrays: @@ -22,10 +23,10 @@ def input_test_func(pub_doi): def cit(list_doi, cit_type): ''' - :param list_doi list of citation dois to get their Citation Class + :param list_doi list of citation DOIs to get their Citation Class :type list_doi: List[String] - returns a list of citations objects for given doi list + returns a list of citations objects for given DOI list ''' cits = [] @@ -36,69 +37,112 @@ def cit(list_doi, cit_type): return cits - beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']] beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []] -zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']] -zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']] +zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], + ['doiz2']] +zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], + ['doiz1']] -inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']] -inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']] -inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []] +inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', + ['subject_ie1'], ['doi_ie2'], ['doi_ie3']] +inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', + ['subject_ie2'], [], ['doi_ie1', 'doi_ie3']] +inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', + ['subject_ie3'], ['doi_ie1', 'doi_ie2'], []] right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []] -right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']] +right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], + ['doi_h1']] right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']] right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']] right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []] right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []] -right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []] +right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], + []] right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []] right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []] right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []] -large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []] -large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []] -large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12','doi_cg_i'], []] -large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']] -large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']] -large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']] -large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']] -large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']] -large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']] -large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']] -large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12','doi_cg_d11']] - -large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []] -large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []] -large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']] -large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']] -large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i','doi_cg_i'], ['doi_lg_2_h21','doi_lg_2_h22']] -large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']] -large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12','doi_cg_i','doi_lg_2_h11']] -large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], ['doi_lg_2_i','doi_lg_2_d21'], ['doi_lg_2_i']] -large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d22','doi_lg_2_d23','doi_lg_2_d24'], ['doi_lg_2_h24','doi_lg_2_i']] -large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d11']] -large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] -large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] -large_graph_2_d24 = ['doi_lg_2_d24', 'title_lg_2_d24', ['contributor_lg_2_d24'], 'journal_lg_2_d24', 'date_lg_2_d24', ['subject_lg_2_d24'], [], ['doi_lg_2_d12']] - -crossed_graph_h21 = ['doi_cg_h21', 'title_cg_h21', ['contributor_cg_h21'], 'journal_cg_h21', 'date_cg_h21', ['subject_cg_h21'], ['doi_cg_h11'], []] -crossed_graph_h22 = ['doi_cg_h22', 'title_cg_h22', ['contributor_cg_h22'], 'journal_cg_h22', 'date_cg_h22', ['subject_cg_h22'], ['doi_cg_h11'], []] -crossed_graph_h11 = ['doi_cg_h11', 'title_cg_h11', ['contributor_cg_h11'], 'journal_cg_h11', 'date_cg_h11', ['subject_cg_h11'], ['doi_cg_i'], ['doi_cg_h21','doi_cg_h22']] -crossed_graph_i = ['doi_cg_i', 'title_cg_i', ['contributor_cg_i'], 'journal_cg_i', 'date_cg_i', ['subject_cg_i'], ['doi_lg_2_i','doi_cg_d11','doi_cg_d12'], ['doi_lg_1_h23','doi_cg_h11','doi_lg_2_h11']] -crossed_graph_d11 = ['doi_cg_d11', 'title_cg_d11', ['contributor_cg_d11'], 'journal_cg_d11', 'date_cg_d11', ['subject_cg_d11'], ['doi_lg_1_d23','doi_cg_d21'], ['doi_cg_i']] -crossed_graph_d12 = ['doi_cg_d12', 'title_cg_d12', ['contributor_cg_d12'], 'journal_cg_d12', 'date_cg_d12', ['subject_cg_d12'], ['doi_cg_d22'], ['doi_cg_i']] -crossed_graph_d21 = ['doi_cg_d21', 'title_cg_d21', ['contributor_cg_d21'], 'journal_cg_d21', 'date_cg_d21', ['subject_cg_d21'], [], ['doi_cg_d11']] -crossed_graph_d22 = ['doi_cg_d22', 'title_cg_d22', ['contributor_cg_d22'], 'journal_cg_d22', 'date_cg_d22', ['subject_cg_d22'], [], ['doi_cg_d12']] - - -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, - right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3, - large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12, - large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, - large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23, large_graph_2_d24, crossed_graph_h21, crossed_graph_h22, crossed_graph_h11, - crossed_graph_i, crossed_graph_d11, crossed_graph_d12, crossed_graph_d21, crossed_graph_d22] +large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', + ['subject_lg_1_h21'], ['doi_lg_1_h11'], []] +large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', + ['subject_lg_1_h22'], ['doi_lg_1_h11', 'doi_lg_1_h12'], []] +large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', + ['subject_lg_1_h23'], ['doi_lg_1_h12', 'doi_cg_i'], []] +large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', + ['subject_lg_1_h11'], ['doi_lg_1_i'], ['doi_lg_1_h21', 'doi_lg_1_h22']] +large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', + ['subject_lg_1_h12'], ['doi_lg_1_i', 'doi_lg_1_d12'], ['doi_lg_1_h22', 'doi_lg_1_h23']] +large_graph_1_i = ['doi_lg_1_i', 'title_lg_1_i', ['contributor_lg_1_i'], 'journal_lg_1_i', 'date_lg_1_i', + ['subject_lg_1_i'], ['doi_lg_1_d11', 'doi_lg_1_d12'], ['doi_lg_1_h11', 'doi_lg_1_h12']] +large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', + ['subject_lg_1_d11'], ['doi_lg_1_d21', 'doi_lg_1_d22'], ['doi_lg_1_i']] +large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', + ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12', 'doi_lg_1_i']] +large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', + ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11', 'doi_lg_1_d22']] +large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', + ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11', 'doi_lg_1_d21']] +large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', + ['subject_lg_1_d23'], [], ['doi_lg_1_d12', 'doi_cg_d11']] + +large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', + ['subject_lg_2_h21'], ['doi_lg_2_h11'], []] +large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', + ['subject_lg_2_h22'], ['doi_lg_2_h11'], []] +large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', + ['subject_lg_2_h23'], ['doi_lg_2_h12', 'doi_lg_2_h24'], ['doi_lg_2_h24']] +large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', + ['subject_lg_2_h24'], ['doi_lg_2_h12', 'doi_lg_2_h23', 'doi_lg_2_d12'], ['doi_lg_2_h23']] +large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', + ['subject_lg_2_h11'], ['doi_lg_2_i', 'doi_cg_i'], ['doi_lg_2_h21', 'doi_lg_2_h22']] +large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', + ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23', 'doi_lg_2_h24']] +large_graph_2_i = ['doi_lg_2_i', 'title_lg_2_i', ['contributor_lg_2_i'], 'journal_lg_2_i', 'date_lg_2_i', + ['subject_lg_2_i'], ['doi_lg_2_d11', 'doi_lg_2_d12'], + ['doi_lg_2_h11', 'doi_lg_2_h12', 'doi_cg_i', 'doi_lg_2_h11']] +large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', + ['subject_lg_2_d11'], ['doi_lg_2_i', 'doi_lg_2_d21'], ['doi_lg_2_i']] +large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', + ['subject_lg_2_d12'], ['doi_lg_2_d22', 'doi_lg_2_d23', 'doi_lg_2_d24'], + ['doi_lg_2_h24', 'doi_lg_2_i']] +large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', + ['subject_lg_2_d21'], [], ['doi_lg_2_d11']] +large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', + ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] +large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', + ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] +large_graph_2_d24 = ['doi_lg_2_d24', 'title_lg_2_d24', ['contributor_lg_2_d24'], 'journal_lg_2_d24', 'date_lg_2_d24', + ['subject_lg_2_d24'], [], ['doi_lg_2_d12']] + +crossed_graph_h21 = ['doi_cg_h21', 'title_cg_h21', ['contributor_cg_h21'], 'journal_cg_h21', 'date_cg_h21', + ['subject_cg_h21'], ['doi_cg_h11'], []] +crossed_graph_h22 = ['doi_cg_h22', 'title_cg_h22', ['contributor_cg_h22'], 'journal_cg_h22', 'date_cg_h22', + ['subject_cg_h22'], ['doi_cg_h11'], []] +crossed_graph_h11 = ['doi_cg_h11', 'title_cg_h11', ['contributor_cg_h11'], 'journal_cg_h11', 'date_cg_h11', + ['subject_cg_h11'], ['doi_cg_i'], ['doi_cg_h21', 'doi_cg_h22']] +crossed_graph_i = ['doi_cg_i', 'title_cg_i', ['contributor_cg_i'], 'journal_cg_i', 'date_cg_i', ['subject_cg_i'], + ['doi_lg_2_i', 'doi_cg_d11', 'doi_cg_d12'], ['doi_lg_1_h23', 'doi_cg_h11', 'doi_lg_2_h11']] +crossed_graph_d11 = ['doi_cg_d11', 'title_cg_d11', ['contributor_cg_d11'], 'journal_cg_d11', 'date_cg_d11', + ['subject_cg_d11'], ['doi_lg_1_d23', 'doi_cg_d21'], ['doi_cg_i']] +crossed_graph_d12 = ['doi_cg_d12', 'title_cg_d12', ['contributor_cg_d12'], 'journal_cg_d12', 'date_cg_d12', + ['subject_cg_d12'], ['doi_cg_d22'], ['doi_cg_i']] +crossed_graph_d21 = ['doi_cg_d21', 'title_cg_d21', ['contributor_cg_d21'], 'journal_cg_d21', 'date_cg_d21', + ['subject_cg_d21'], [], ['doi_cg_d11']] +crossed_graph_d22 = ['doi_cg_d22', 'title_cg_d22', ['contributor_cg_d22'], 'journal_cg_d22', 'date_cg_d22', + ['subject_cg_d22'], [], ['doi_cg_d12']] + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, + right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, + right_depth02, right_depth1, right_depth2, right_depth3, + large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, + large_graph_1_i, large_graph_1_d11, large_graph_1_d12, + large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, + large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, + large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, + large_graph_2_d23, large_graph_2_d24, crossed_graph_h21, crossed_graph_h22, crossed_graph_h11, + crossed_graph_i, crossed_graph_d11, crossed_graph_d12, crossed_graph_d21, crossed_graph_d22] diff --git a/verarbeitung/test/update_graph_unittest.py b/verarbeitung/test/update_graph_unittest.py index 62504e5..1ebb0d1 100644 --- a/verarbeitung/test/update_graph_unittest.py +++ b/verarbeitung/test/update_graph_unittest.py @@ -1,9 +1,8 @@ import unittest -import sys +import sys from pathlib import Path - sys.path.append("../") from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction @@ -14,382 +13,405 @@ from verarbeitung.update_graph.update_depth import reduce_max_height_depth_test, from verarbeitung.update_graph.update_edges import back_to_valid_edges from verarbeitung.update_graph.delete_nodes_edges import search_ref_cit_graph_rec_test from verarbeitung.update_graph.compare_old_and_new_node_lists import compare_old_and_new_node_lists -from verarbeitung.update_graph.connect_new_input import find_furthermost_citations_test, complete_changed_group_nodes_test +from verarbeitung.update_graph.connect_new_input import find_furthermost_citations_test, \ + complete_changed_group_nodes_test from verarbeitung.get_pub_from_input import input_test_func + class UpdatingTest(unittest.TestCase): - maxDiff = None - - def test_deleted_input_dois(self): - nodes_old_single, edges_old_single, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) - nodes_old_both, edges_old_both, err_list = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True) - output_to_json(nodes_old_both, edges_old_both, 2, 2, test_var=True) - nodes_new_single, edges_new_single, err_list = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True) - self.assertCountEqual(nodes_old_single,nodes_new_single) - self.assertCountEqual(edges_old_single, edges_new_single) - - nodes_old_single, edges_old_single, err_list = init_graph_construction(['doi_cg_i'],3,3,True) - nodes_old_two, edges_old_two, err_list = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True) - nodes_old_three, edges_old_three, err_list = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True) - - def test_new_height(self): - nodes_height_0, edges_height_0, err_list = init_graph_construction(['doi_lg_1_i'],2,0,True) - nodes_height_1, edges_height_1, err_list = init_graph_construction(['doi_lg_1_i'],2,1,True) - nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) - - output_to_json(nodes_height_2, edges_height_2, 2, 2, 'new_height.json', True) - nodes_new_height_1, edges_new_height_1, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True) - self.assertCountEqual(nodes_height_1, nodes_new_height_1) - self.assertCountEqual(edges_height_1, edges_new_height_1) - - nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) - output_to_json(nodes_height_2, edges_height_2, 2, 2, 'new_height.json', True) - nodes_new_height_0, edges_new_height_0, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True) - self.assertCountEqual(nodes_height_0, nodes_new_height_0) - self.assertCountEqual(edges_height_0, edges_new_height_0) - - def test_ref_to_input(self): - nodes, edges, err_list = init_graph_construction(['doi_cg_i'], 2, 2, True) - nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11'], 2, 2, True) - output_to_json(nodes, edges, 2, 2, 'ref_to_input.json') - new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11'], 'ref_to_input.json', 2, 2, True) - self.assertCountEqual(new_nodes, nodes_2) - self.assertCountEqual(new_edges, edges_2) - - nodes, edges, err_list = init_graph_construction(['doi_cg_i','doi_lg_2_i'], 2, 2, True) - nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11','doi_lg_2_i'], 2, 2, True) - - output_to_json(nodes, edges, 2, 2, 'ref_to_input.json') - new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) - self.assertCountEqual(new_nodes, nodes_2) - self.assertCountEqual(new_edges, edges_2) - - output_to_json(nodes_2, edges_2, 2, 2, 'ref_to_input.json') - new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) - self.assertCountEqual(new_nodes, nodes_2) - self.assertCountEqual(new_edges, edges_2) - - nodes, edges, err_list = init_graph_construction(['doi_cg_i','doi_lg_2_i'], 2, 2, True) - nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_i','doi_lg_2_h11','doi_lg_1_i'], 3, 3, True) - - output_to_json(nodes_2, edges_2, 2, 2, 'ref_to_input.json') - new_nodes, new_edges, new_err_list = update_graph(['doi_cg_i','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) - self.assertCountEqual(new_nodes, nodes) - self.assertCountEqual(new_edges, edges) - - - - - - - ## Ab hier die Tests für die einzelnen Funktionen ## - - # update_graph.py: - - def test_get_old_input_dois(self): - pub_lg_1_i = input_test_func('doi_lg_1_i') - pub_lg_1_i.group = 0 - pub_lg_1_h_11 = input_test_func('doi_lg_1_h11') - pub_lg_1_h_11.group = 1 - pub_lg_1_d_11 = input_test_func('doi_lg_1_d11') - pub_lg_1_d_11.group = -1 - old_pubs = [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_d_11] - self.assertCountEqual(get_old_input_dois(old_pubs),['doi_lg_1_i']) - - # hard to test because we only have dois as test objects and no urls variant - def test_get_new_input_dois(self): - new_dois = ['doi_lg_2_i', 'doi_lg_1_i', 'doi_cg_i'] - self.assertCountEqual(get_new_input_dois(new_dois, True), ['doi_lg_2_i', 'doi_lg_1_i', 'doi_cg_i']) - - - # update_depth.py: - - def test_reduce_max_height(self): - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h_11.group = 1 - pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d_11.group = -1 - pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') - pub_lg_2_h_21.group = 2 - pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') - pub_lg_2_d_21.group = -2 - pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21] - self.assertCountEqual(reduce_max_height_depth_test(pubs, 2, "Height"), [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21]) - self.assertCountEqual(reduce_max_height_depth_test(pubs, 1, "Height"), [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_d_11, pub_lg_2_d_21]) - self.assertCountEqual(reduce_max_height_depth_test(pubs, 0, "Height"), [pub_lg_2_i, pub_lg_2_d_11, pub_lg_2_d_21]) - - def test_reduce_max_depth(self): - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h_11.group = 1 - pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d_11.group = -1 - pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') - pub_lg_2_h_21.group = 2 - pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') - pub_lg_2_d_21.group = -2 - pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21] - self.assertCountEqual(reduce_max_height_depth_test(pubs, 2, "Depth"), [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21]) - self.assertCountEqual(reduce_max_height_depth_test(pubs, 1, "Depth"), [pub_lg_2_i, pub_lg_2_d_11, pub_lg_2_h_11, pub_lg_2_h_21]) - self.assertCountEqual(reduce_max_height_depth_test(pubs, 0, "Depth"), [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21]) - - def test_get_old_max_references(self): - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h_11.group = 1 - pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d_11.group = -1 - pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') - pub_lg_2_h_21.group = 2 - pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') - pub_lg_2_d_21.group = -2 - pub_lg_2_d_22 = input_test_func('doi_lg_2_d22') - pub_lg_2_d_22.group = -2 - pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21, pub_lg_2_d_22] - self.assertCountEqual(get_old_max_references_citations_test(pubs, 2, "Depth"), [pub_lg_2_d_21,pub_lg_2_d_22]) - - def test_get_old_max_citations(self): - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h_11.group = 1 - pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d_11.group = -1 - pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') - pub_lg_2_h_21.group = 2 - pub_lg_2_h_22 = input_test_func('doi_lg_2_h22') - pub_lg_2_h_22.group = 2 - pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') - pub_lg_2_d_21.group = -2 - pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_h_22,pub_lg_2_d_11, pub_lg_2_d_21] - self.assertCountEqual(get_old_max_references_citations_test(pubs, 2, "Height"), [pub_lg_2_h_21,pub_lg_2_h_22]) - - # import_from_json.py: - - def test_input_from_json(self): - nodes_old, edges_old, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) - output_to_json(nodes_old, edges_old, 2, 2, test_var = True) - nodes_new, edges_new, old_depth, old_height = input_from_json('test_output.json') - self.assertCountEqual(nodes_old,nodes_new) - self.assertCountEqual(edges_old, edges_new) - - # update_edges.py: - - def test_back_to_valid_edges(self): - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h_11.group = 1 - pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d_11.group = -1 - pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_d_11] - edges = [['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11'],['doi_lg_2_h21','doi_lg_2_h11'],['doi_lg_2_i','doi_lg_2_d21']] - back_to_valid_edges(edges, pubs) - self.assertCountEqual([['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11']],edges) - - # delete_nodes_edges.py: - - def test_search_ref_graph_rec(self): - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h11.group = 1 - pub_lg_2_h12 = input_test_func('doi_lg_2_h12') - pub_lg_2_h12.group = 1 - pub_lg_2_d11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d11.group = -1 - pub_lg_2_d12 = input_test_func('doi_lg_2_d12') - pub_lg_2_d12.group = -1 - pub_lg_2_h21 = input_test_func('doi_lg_2_h21') - pub_lg_2_h21.group = 2 - pub_lg_2_h22 = input_test_func('doi_lg_2_h22') - pub_lg_2_h22.group = 2 - pub_lg_2_d21 = input_test_func('doi_lg_2_d21') - pub_lg_2_d21.group = -2 - - pub_cg_i = input_test_func('doi_cg_i') - pub_cg_i.group = 0 - pub_cg_h11 = input_test_func('doi_cg_h11') - pub_cg_h11.group = 1 - pub_cg_d12 = input_test_func('doi_cg_d11') - pub_cg_d12.group = -1 - pub_cg_d11 = input_test_func('doi_cg_d12') - pub_cg_d11.group = -1 - pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_cg_i, pub_cg_d11, pub_cg_d12, pub_cg_h11] - usable_nodes = search_ref_cit_graph_rec_test(pubs, [pub_cg_i], 2, "Citation") - self.assertCountEqual(usable_nodes, [pub_cg_h11, pub_lg_2_h11, pub_lg_2_h21, pub_lg_2_h22]) - - # compare_old_and_new_node_lists.py: - - def test_compare_old_and_new_nodes(self): - old_input = ['doi_lg_1_i', 'doi_lg_2_i'] - new_input = ['doi_lg_1_i', 'doi_cg_i'] - common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_input, new_input) - self.assertCountEqual(common_nodes, ['doi_lg_1_i']) - self.assertCountEqual(inserted_nodes, ['doi_cg_i']) - self.assertCountEqual(deleted_nodes, ['doi_lg_2_i']) - - - # connect_new_input.py: - - def test_find_furthermost_citations(self): - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h11.group = 1 - pub_lg_2_h12 = input_test_func('doi_lg_2_h12') - pub_lg_2_h12.group = 1 - pub_lg_2_d11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d11.group = -1 - pub_lg_2_d12 = input_test_func('doi_lg_2_d12') - pub_lg_2_d12.group = -1 - pub_lg_2_h21 = input_test_func('doi_lg_2_h21') - pub_lg_2_h21.group = 2 - pub_lg_2_h22 = input_test_func('doi_lg_2_h22') - pub_lg_2_h22.group = 2 - pub_lg_2_d21 = input_test_func('doi_lg_2_d21') - pub_lg_2_d21.group = -2 - pub_lg_2_d22 = input_test_func('doi_lg_2_d22') - pub_lg_2_d22.group = -2 - pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_lg_2_d22] - - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, 2, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, 1, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) - - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, 2, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, 1, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) - - - def test_complete_changed_group_nodes(self): - pub_cg_i = input_test_func('doi_cg_i') - pub_cg_i.group = 0 - pub_cg_h11 = input_test_func('doi_cg_h11') - pub_cg_h11.group = 1 - pub_cg_h21 = input_test_func('doi_cg_h21') - pub_cg_h21.group = 2 - pub_cg_h22 = input_test_func('doi_cg_h22') - pub_cg_h22.group = 2 - pub_cg_d11 = input_test_func('doi_cg_d11') - pub_cg_d11.group = -1 - pub_cg_d12 = input_test_func('doi_cg_d12') - pub_cg_d12.group = -1 - pub_cg_d21 = input_test_func('doi_cg_d21') - pub_cg_d21.group = -2 - pub_cg_d22 = input_test_func('doi_cg_d22') - pub_cg_d22.group = -2 - - pub_lg_1_h23 = input_test_func('doi_lg_1_h23') - pub_lg_1_h23.group = 2 - pub_lg_1_d23 = input_test_func('doi_lg_1_d23') - pub_lg_1_d23.group = -2 - - pub_lg_2_i = input_test_func('doi_lg_2_i') - pub_lg_2_i.group = 0 - pub_lg_2_h11 = input_test_func('doi_lg_2_h11') - pub_lg_2_h11.group = 1 - pub_lg_2_h21 = input_test_func('doi_lg_2_h21') - pub_lg_2_h21.group = 2 - pub_lg_2_h22 = input_test_func('doi_lg_2_h22') - pub_lg_2_h22.group = 2 - pub_lg_2_d11 = input_test_func('doi_lg_2_d11') - pub_lg_2_d11.group = -1 - pub_lg_2_d12 = input_test_func('doi_lg_2_d12') - pub_lg_2_d12.group = -1 - pub_lg_2_d21 = input_test_func('doi_lg_2_d21') - pub_lg_2_d21.group = -2 - pub_lg_2_d22 = input_test_func('doi_lg_2_d22') - pub_lg_2_d22.group = -2 - pub_lg_2_d23 = input_test_func('doi_lg_2_d23') - pub_lg_2_d23.group = -2 - pub_lg_2_d24 = input_test_func('doi_lg_2_d24') - pub_lg_2_d24.group = -2 - - - moved_1_pub_cg_i = input_test_func('doi_cg_i') - moved_1_pub_cg_i.group = 1 - moved_1_pub_cg_h11 = input_test_func('doi_cg_h11') - moved_1_pub_cg_h11.group = 2 - moved_1_pub_cg_h21 = input_test_func('doi_cg_h21') - moved_1_pub_cg_h21.group = 3 - moved_1_pub_cg_h22 = input_test_func('doi_cg_h22') - moved_1_pub_cg_h22.group = 3 - moved_1_pub_cg_d11 = input_test_func('doi_cg_d11') - moved_1_pub_cg_d11.group = 0 - - moved_1_pub_cg_d21 = input_test_func('doi_cg_d21') - moved_1_pub_cg_d21.group = -1 - - moved_1_pub_lg_1_h23 = input_test_func('doi_lg_1_h23') - moved_1_pub_lg_1_h23.group = 2 - moved_1_pub_lg_1_d23 = input_test_func('doi_lg_1_d23') - moved_1_pub_lg_1_d23.group = -1 - - moved_1_pub_lg_2_h11 = input_test_func('doi_lg_2_h11') - moved_1_pub_lg_2_h11.group = 1 - - - moved_2_pub_cg_i = input_test_func('doi_cg_i') - moved_2_pub_cg_i.group = -1 - moved_2_pub_cg_d11 = input_test_func('doi_cg_d11') - moved_2_pub_cg_d11.group = -2 - moved_2_pub_cg_d12 = input_test_func('doi_cg_d12') - moved_2_pub_cg_d12.group = -2 - moved_2_pub_cg_d21 = input_test_func('doi_cg_d21') - moved_2_pub_cg_d21.group = -3 - moved_2_pub_cg_d22 = input_test_func('doi_cg_d22') - moved_2_pub_cg_d22.group = -3 - - moved_2_pub_lg_1_d23 = input_test_func('doi_lg_1_d23') - moved_2_pub_lg_1_d23.group = -3 - - moved_2_pub_lg_2_h21 = input_test_func('doi_lg_2_h21') - moved_2_pub_lg_2_h21.group = 1 - moved_2_pub_lg_2_h22 = input_test_func('doi_lg_2_h22') - moved_2_pub_lg_2_h22.group = 1 - moved_2_pub_lg_2_h11 = input_test_func('doi_lg_2_h11') - moved_2_pub_lg_2_h11.group = 0 - moved_2_pub_lg_2_i = input_test_func('doi_lg_2_i') - moved_2_pub_lg_2_i.group = -2 - moved_2_pub_lg_2_d11 = input_test_func('doi_lg_2_d11') - moved_2_pub_lg_2_d11.group = -2 - moved_2_pub_lg_2_d12 = input_test_func('doi_lg_2_d12') - moved_2_pub_lg_2_d12.group = -2 - moved_2_pub_lg_2_d21 = input_test_func('doi_lg_2_d21') - moved_2_pub_lg_2_d21.group = -3 - moved_2_pub_lg_2_d22 = input_test_func('doi_lg_2_d22') - moved_2_pub_lg_2_d22.group = -3 - moved_2_pub_lg_2_d23 = input_test_func('doi_lg_2_d23') - moved_2_pub_lg_2_d23.group = -3 - moved_2_pub_lg_2_d24 = input_test_func('doi_lg_2_d24') - moved_2_pub_lg_2_d24.group = -3 - - pubs = [pub_cg_i, pub_cg_h11, pub_cg_h21, pub_cg_h22, pub_cg_d11, pub_cg_d12, pub_cg_d21, pub_cg_d22, pub_lg_1_h23, pub_lg_1_d23, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_h11, pub_lg_2_i, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_d21, pub_lg_2_d22, pub_lg_2_d23, pub_lg_2_d24] - edges = [] - nodes, edges, handled_nodes = complete_changed_group_nodes_test(pubs, edges, 'doi_cg_d11', 2, 2, 2, 2) - self.assertCountEqual(nodes, [moved_1_pub_cg_d11, moved_1_pub_cg_d21, moved_1_pub_lg_1_d23, moved_1_pub_cg_i, moved_1_pub_lg_1_h23, moved_1_pub_cg_h11, moved_1_pub_lg_2_h11]) - self.assertCountEqual(edges, [['doi_cg_d11','doi_lg_1_d23'],['doi_cg_d11','doi_cg_d21'],['doi_cg_i','doi_cg_d11'],['doi_lg_1_h23','doi_cg_i'],['doi_cg_h11','doi_cg_i'],['doi_lg_2_h11','doi_cg_i']]) - - nodes, edges, handled_nodes = complete_changed_group_nodes_test(pubs, edges, 'doi_lg_2_h11', 2, 2, 3, 3) - self.assertCountEqual(nodes, [moved_2_pub_cg_i, moved_2_pub_cg_d11, moved_2_pub_lg_1_d23, moved_2_pub_cg_d21, moved_2_pub_cg_d12, moved_2_pub_cg_d22, moved_2_pub_lg_2_h21, moved_2_pub_lg_2_h22, moved_2_pub_lg_2_h11, moved_2_pub_lg_2_i, moved_2_pub_lg_2_d11, moved_2_pub_lg_2_d21, moved_2_pub_lg_2_d12, moved_2_pub_lg_2_d22, moved_2_pub_lg_2_d23, moved_2_pub_lg_2_d24]) - self.assertCountEqual(edges, [['doi_cg_d11','doi_lg_1_d23'],['doi_cg_d11','doi_cg_d21'],['doi_cg_i','doi_cg_d11'],['doi_cg_i','doi_cg_d12'],['doi_cg_d12','doi_cg_d22'],['doi_lg_2_h11','doi_cg_i'],['doi_cg_i','doi_lg_2_i'],['doi_lg_2_h21','doi_lg_2_h11'],['doi_lg_2_h22','doi_lg_2_h11'],['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11'],['doi_lg_2_d11','doi_lg_2_i'],['doi_lg_2_d11','doi_lg_2_d21'],['doi_lg_2_i','doi_lg_2_d12'],['doi_lg_2_d12','doi_lg_2_d22'],['doi_lg_2_d12','doi_lg_2_d23'],['doi_lg_2_d12','doi_lg_2_d24']]) + maxDiff = None + + def test_deleted_input_dois(self): + nodes_old_single, edges_old_single, err_list = init_graph_construction(['doi_lg_1_i'], 2, 2, True) + nodes_old_both, edges_old_both, err_list = init_graph_construction(['doi_lg_1_i', 'doi_lg_2_i'], 2, 2, True) + output_to_json(nodes_old_both, edges_old_both, 2, 2, test_var=True) + nodes_new_single, edges_new_single, err_list = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True) + self.assertCountEqual(nodes_old_single, nodes_new_single) + self.assertCountEqual(edges_old_single, edges_new_single) + + nodes_old_single, edges_old_single, err_list = init_graph_construction(['doi_cg_i'], 3, 3, True) + nodes_old_two, edges_old_two, err_list = init_graph_construction(['doi_lg_1_i', 'doi_cg_i'], 3, 3, True) + nodes_old_three, edges_old_three, err_list = init_graph_construction(['doi_lg_1_i', 'doi_lg_2_i', 'doi_cg_i'], + 3, 3, True) + + def test_new_height(self): + nodes_height_0, edges_height_0, err_list = init_graph_construction(['doi_lg_1_i'], 2, 0, True) + nodes_height_1, edges_height_1, err_list = init_graph_construction(['doi_lg_1_i'], 2, 1, True) + nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'], 2, 2, True) + + output_to_json(nodes_height_2, edges_height_2, 2, 2, 'new_height.json', True) + nodes_new_height_1, edges_new_height_1, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True) + self.assertCountEqual(nodes_height_1, nodes_new_height_1) + self.assertCountEqual(edges_height_1, edges_new_height_1) + + nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'], 2, 2, True) + output_to_json(nodes_height_2, edges_height_2, 2, 2, 'new_height.json', True) + nodes_new_height_0, edges_new_height_0, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True) + self.assertCountEqual(nodes_height_0, nodes_new_height_0) + self.assertCountEqual(edges_height_0, edges_new_height_0) + + def test_ref_to_input(self): + nodes, edges, err_list = init_graph_construction(['doi_cg_i'], 2, 2, True) + nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11'], 2, 2, True) + output_to_json(nodes, edges, 2, 2, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes_2) + self.assertCountEqual(new_edges, edges_2) + + nodes, edges, err_list = init_graph_construction(['doi_cg_i', 'doi_lg_2_i'], 2, 2, True) + nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11', 'doi_lg_2_i'], 2, 2, True) + + output_to_json(nodes, edges, 2, 2, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11', 'doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes_2) + self.assertCountEqual(new_edges, edges_2) + + output_to_json(nodes_2, edges_2, 2, 2, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11', 'doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes_2) + self.assertCountEqual(new_edges, edges_2) + + nodes, edges, err_list = init_graph_construction(['doi_cg_i', 'doi_lg_2_i'], 2, 2, True) + nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_i', 'doi_lg_2_h11', 'doi_lg_1_i'], 3, 3, True) + + output_to_json(nodes_2, edges_2, 2, 2, 'ref_to_input.json') + new_nodes, new_edges, new_err_list = update_graph(['doi_cg_i', 'doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) + self.assertCountEqual(new_nodes, nodes) + self.assertCountEqual(new_edges, edges) + + ## From here the tests for the individual functions ## + + # update_graph.py: + + def test_get_old_input_dois(self): + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + pub_lg_1_h_11 = input_test_func('doi_lg_1_h11') + pub_lg_1_h_11.group = 1 + pub_lg_1_d_11 = input_test_func('doi_lg_1_d11') + pub_lg_1_d_11.group = -1 + old_pubs = [pub_lg_1_i, pub_lg_1_h_11, pub_lg_1_d_11] + self.assertCountEqual(get_old_input_dois(old_pubs), ['doi_lg_1_i']) + + # hard to test because we only have DOIs as test objects and no urls variant + def test_get_new_input_dois(self): + new_dois = ['doi_lg_2_i', 'doi_lg_1_i', 'doi_cg_i'] + self.assertCountEqual(get_new_input_dois(new_dois, True), ['doi_lg_2_i', 'doi_lg_1_i', 'doi_cg_i']) + + # update_depth.py: + + def test_reduce_max_height(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h_11.group = 1 + pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d_11.group = -1 + pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h_21.group = 2 + pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d_21.group = -2 + pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21] + self.assertCountEqual(reduce_max_height_depth_test(pubs, 2, "Height"), + [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21]) + self.assertCountEqual(reduce_max_height_depth_test(pubs, 1, "Height"), + [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_d_11, pub_lg_2_d_21]) + self.assertCountEqual(reduce_max_height_depth_test(pubs, 0, "Height"), + [pub_lg_2_i, pub_lg_2_d_11, pub_lg_2_d_21]) + + def test_reduce_max_depth(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h_11.group = 1 + pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d_11.group = -1 + pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h_21.group = 2 + pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d_21.group = -2 + pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21] + self.assertCountEqual(reduce_max_height_depth_test(pubs, 2, "Depth"), + [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21]) + self.assertCountEqual(reduce_max_height_depth_test(pubs, 1, "Depth"), + [pub_lg_2_i, pub_lg_2_d_11, pub_lg_2_h_11, pub_lg_2_h_21]) + self.assertCountEqual(reduce_max_height_depth_test(pubs, 0, "Depth"), + [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21]) + + def test_get_old_max_references(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h_11.group = 1 + pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d_11.group = -1 + pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h_21.group = 2 + pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d_21.group = -2 + pub_lg_2_d_22 = input_test_func('doi_lg_2_d22') + pub_lg_2_d_22.group = -2 + pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_d_11, pub_lg_2_d_21, pub_lg_2_d_22] + self.assertCountEqual(get_old_max_references_citations_test(pubs, 2, "Depth"), [pub_lg_2_d_21, pub_lg_2_d_22]) + + def test_get_old_max_citations(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h_11.group = 1 + pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d_11.group = -1 + pub_lg_2_h_21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h_21.group = 2 + pub_lg_2_h_22 = input_test_func('doi_lg_2_h22') + pub_lg_2_h_22.group = 2 + pub_lg_2_d_21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d_21.group = -2 + pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_h_22, pub_lg_2_d_11, pub_lg_2_d_21] + self.assertCountEqual(get_old_max_references_citations_test(pubs, 2, "Height"), [pub_lg_2_h_21, pub_lg_2_h_22]) + + # import_from_json.py: + + def test_input_from_json(self): + nodes_old, edges_old, err_list = init_graph_construction(['doi_lg_1_i'], 2, 2, True) + output_to_json(nodes_old, edges_old, 2, 2, test_var=True) + nodes_new, edges_new, old_depth, old_height = input_from_json('test_output.json') + self.assertCountEqual(nodes_old, nodes_new) + self.assertCountEqual(edges_old, edges_new) + + # update_edges.py: + + def test_back_to_valid_edges(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h_11.group = 1 + pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d_11.group = -1 + pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_d_11] + edges = [['doi_lg_2_h11', 'doi_lg_2_i'], ['doi_lg_2_i', 'doi_lg_2_d11'], ['doi_lg_2_h21', 'doi_lg_2_h11'], + ['doi_lg_2_i', 'doi_lg_2_d21']] + back_to_valid_edges(edges, pubs) + self.assertCountEqual([['doi_lg_2_h11', 'doi_lg_2_i'], ['doi_lg_2_i', 'doi_lg_2_d11']], edges) + + # delete_nodes_edges.py: + + def test_search_ref_graph_rec(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h11.group = 1 + pub_lg_2_h12 = input_test_func('doi_lg_2_h12') + pub_lg_2_h12.group = 1 + pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d11.group = -1 + pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + pub_lg_2_d12.group = -1 + pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h21.group = 2 + pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + pub_lg_2_h22.group = 2 + pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d21.group = -2 + + pub_cg_i = input_test_func('doi_cg_i') + pub_cg_i.group = 0 + pub_cg_h11 = input_test_func('doi_cg_h11') + pub_cg_h11.group = 1 + pub_cg_d12 = input_test_func('doi_cg_d11') + pub_cg_d12.group = -1 + pub_cg_d11 = input_test_func('doi_cg_d12') + pub_cg_d11.group = -1 + pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, + pub_lg_2_d21, pub_cg_i, pub_cg_d11, pub_cg_d12, pub_cg_h11] + usable_nodes = search_ref_cit_graph_rec_test(pubs, [pub_cg_i], 2, "Citation") + self.assertCountEqual(usable_nodes, [pub_cg_h11, pub_lg_2_h11, pub_lg_2_h21, pub_lg_2_h22]) + + # compare_old_and_new_node_lists.py: + + def test_compare_old_and_new_nodes(self): + old_input = ['doi_lg_1_i', 'doi_lg_2_i'] + new_input = ['doi_lg_1_i', 'doi_cg_i'] + common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_input, new_input) + self.assertCountEqual(common_nodes, ['doi_lg_1_i']) + self.assertCountEqual(inserted_nodes, ['doi_cg_i']) + self.assertCountEqual(deleted_nodes, ['doi_lg_2_i']) + + # connect_new_input.py: + + def test_find_furthermost_citations(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h11.group = 1 + pub_lg_2_h12 = input_test_func('doi_lg_2_h12') + pub_lg_2_h12.group = 1 + pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d11.group = -1 + pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + pub_lg_2_d12.group = -1 + pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h21.group = 2 + pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + pub_lg_2_h22.group = 2 + pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d21.group = -2 + pub_lg_2_d22 = input_test_func('doi_lg_2_d22') + pub_lg_2_d22.group = -2 + pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, + pub_lg_2_d21, pub_lg_2_d22] + + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, 2, "Citation"), + [pub_lg_2_h21, pub_lg_2_h22]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, 1, "Citation"), + [pub_lg_2_h21, pub_lg_2_h22]) + + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, 2, "Reference"), + [pub_lg_2_d21, pub_lg_2_i]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, 1, "Reference"), + [pub_lg_2_d21, pub_lg_2_i]) + + def test_complete_changed_group_nodes(self): + pub_cg_i = input_test_func('doi_cg_i') + pub_cg_i.group = 0 + pub_cg_h11 = input_test_func('doi_cg_h11') + pub_cg_h11.group = 1 + pub_cg_h21 = input_test_func('doi_cg_h21') + pub_cg_h21.group = 2 + pub_cg_h22 = input_test_func('doi_cg_h22') + pub_cg_h22.group = 2 + pub_cg_d11 = input_test_func('doi_cg_d11') + pub_cg_d11.group = -1 + pub_cg_d12 = input_test_func('doi_cg_d12') + pub_cg_d12.group = -1 + pub_cg_d21 = input_test_func('doi_cg_d21') + pub_cg_d21.group = -2 + pub_cg_d22 = input_test_func('doi_cg_d22') + pub_cg_d22.group = -2 + + pub_lg_1_h23 = input_test_func('doi_lg_1_h23') + pub_lg_1_h23.group = 2 + pub_lg_1_d23 = input_test_func('doi_lg_1_d23') + pub_lg_1_d23.group = -2 + + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h11.group = 1 + pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h21.group = 2 + pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + pub_lg_2_h22.group = 2 + pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d11.group = -1 + pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + pub_lg_2_d12.group = -1 + pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d21.group = -2 + pub_lg_2_d22 = input_test_func('doi_lg_2_d22') + pub_lg_2_d22.group = -2 + pub_lg_2_d23 = input_test_func('doi_lg_2_d23') + pub_lg_2_d23.group = -2 + pub_lg_2_d24 = input_test_func('doi_lg_2_d24') + pub_lg_2_d24.group = -2 + + moved_1_pub_cg_i = input_test_func('doi_cg_i') + moved_1_pub_cg_i.group = 1 + moved_1_pub_cg_h11 = input_test_func('doi_cg_h11') + moved_1_pub_cg_h11.group = 2 + moved_1_pub_cg_h21 = input_test_func('doi_cg_h21') + moved_1_pub_cg_h21.group = 3 + moved_1_pub_cg_h22 = input_test_func('doi_cg_h22') + moved_1_pub_cg_h22.group = 3 + moved_1_pub_cg_d11 = input_test_func('doi_cg_d11') + moved_1_pub_cg_d11.group = 0 + + moved_1_pub_cg_d21 = input_test_func('doi_cg_d21') + moved_1_pub_cg_d21.group = -1 + + moved_1_pub_lg_1_h23 = input_test_func('doi_lg_1_h23') + moved_1_pub_lg_1_h23.group = 2 + moved_1_pub_lg_1_d23 = input_test_func('doi_lg_1_d23') + moved_1_pub_lg_1_d23.group = -1 + + moved_1_pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + moved_1_pub_lg_2_h11.group = 1 + + moved_2_pub_cg_i = input_test_func('doi_cg_i') + moved_2_pub_cg_i.group = -1 + moved_2_pub_cg_d11 = input_test_func('doi_cg_d11') + moved_2_pub_cg_d11.group = -2 + moved_2_pub_cg_d12 = input_test_func('doi_cg_d12') + moved_2_pub_cg_d12.group = -2 + moved_2_pub_cg_d21 = input_test_func('doi_cg_d21') + moved_2_pub_cg_d21.group = -3 + moved_2_pub_cg_d22 = input_test_func('doi_cg_d22') + moved_2_pub_cg_d22.group = -3 + + moved_2_pub_lg_1_d23 = input_test_func('doi_lg_1_d23') + moved_2_pub_lg_1_d23.group = -3 + + moved_2_pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + moved_2_pub_lg_2_h21.group = 1 + moved_2_pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + moved_2_pub_lg_2_h22.group = 1 + moved_2_pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + moved_2_pub_lg_2_h11.group = 0 + moved_2_pub_lg_2_i = input_test_func('doi_lg_2_i') + moved_2_pub_lg_2_i.group = -2 + moved_2_pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + moved_2_pub_lg_2_d11.group = -2 + moved_2_pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + moved_2_pub_lg_2_d12.group = -2 + moved_2_pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + moved_2_pub_lg_2_d21.group = -3 + moved_2_pub_lg_2_d22 = input_test_func('doi_lg_2_d22') + moved_2_pub_lg_2_d22.group = -3 + moved_2_pub_lg_2_d23 = input_test_func('doi_lg_2_d23') + moved_2_pub_lg_2_d23.group = -3 + moved_2_pub_lg_2_d24 = input_test_func('doi_lg_2_d24') + moved_2_pub_lg_2_d24.group = -3 + + pubs = [pub_cg_i, pub_cg_h11, pub_cg_h21, pub_cg_h22, pub_cg_d11, pub_cg_d12, pub_cg_d21, pub_cg_d22, + pub_lg_1_h23, pub_lg_1_d23, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_h11, pub_lg_2_i, pub_lg_2_d11, + pub_lg_2_d12, pub_lg_2_d21, pub_lg_2_d22, pub_lg_2_d23, pub_lg_2_d24] + edges = [] + nodes, edges, handled_nodes = complete_changed_group_nodes_test(pubs, edges, 'doi_cg_d11', 2, 2, 2, 2) + self.assertCountEqual(nodes, [moved_1_pub_cg_d11, moved_1_pub_cg_d21, moved_1_pub_lg_1_d23, moved_1_pub_cg_i, + moved_1_pub_lg_1_h23, moved_1_pub_cg_h11, moved_1_pub_lg_2_h11]) + self.assertCountEqual(edges, + [['doi_cg_d11', 'doi_lg_1_d23'], ['doi_cg_d11', 'doi_cg_d21'], ['doi_cg_i', 'doi_cg_d11'], + ['doi_lg_1_h23', 'doi_cg_i'], ['doi_cg_h11', 'doi_cg_i'], ['doi_lg_2_h11', 'doi_cg_i']]) + + nodes, edges, handled_nodes = complete_changed_group_nodes_test(pubs, edges, 'doi_lg_2_h11', 2, 2, 3, 3) + self.assertCountEqual(nodes, [moved_2_pub_cg_i, moved_2_pub_cg_d11, moved_2_pub_lg_1_d23, moved_2_pub_cg_d21, + moved_2_pub_cg_d12, moved_2_pub_cg_d22, moved_2_pub_lg_2_h21, + moved_2_pub_lg_2_h22, moved_2_pub_lg_2_h11, moved_2_pub_lg_2_i, + moved_2_pub_lg_2_d11, moved_2_pub_lg_2_d21, moved_2_pub_lg_2_d12, + moved_2_pub_lg_2_d22, moved_2_pub_lg_2_d23, moved_2_pub_lg_2_d24]) + self.assertCountEqual(edges, + [['doi_cg_d11', 'doi_lg_1_d23'], ['doi_cg_d11', 'doi_cg_d21'], ['doi_cg_i', 'doi_cg_d11'], + ['doi_cg_i', 'doi_cg_d12'], ['doi_cg_d12', 'doi_cg_d22'], ['doi_lg_2_h11', 'doi_cg_i'], + ['doi_cg_i', 'doi_lg_2_i'], ['doi_lg_2_h21', 'doi_lg_2_h11'], + ['doi_lg_2_h22', 'doi_lg_2_h11'], ['doi_lg_2_h11', 'doi_lg_2_i'], + ['doi_lg_2_i', 'doi_lg_2_d11'], ['doi_lg_2_d11', 'doi_lg_2_i'], + ['doi_lg_2_d11', 'doi_lg_2_d21'], ['doi_lg_2_i', 'doi_lg_2_d12'], + ['doi_lg_2_d12', 'doi_lg_2_d22'], ['doi_lg_2_d12', 'doi_lg_2_d23'], + ['doi_lg_2_d12', 'doi_lg_2_d24']]) def keep_only_dois(nodes): - ''' + ''' :param nodes: input list of nodes of type Publication :type nodes: List[Publication] - gets nodes of type pub and return only their doi + gets nodes of type pub and return only their DOI ''' - doi_list = [] - for node in nodes: - doi_list.append(node.doi_url) - return doi_list + doi_list = [] + for node in nodes: + doi_list.append(node.doi_url) + return doi_list if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/verarbeitung/update_graph/README.md b/verarbeitung/update_graph/README.md index 0193c1e..05dfbc9 100644 --- a/verarbeitung/update_graph/README.md +++ b/verarbeitung/update_graph/README.md @@ -36,6 +36,5 @@ update_depth.py ## Authors - Donna Löding - Alina Molkentin -- Xinyi Tang - Judith Große - Malte Schokolowski \ No newline at end of file diff --git a/verarbeitung/update_graph/compare_old_and_new_node_lists.py b/verarbeitung/update_graph/compare_old_and_new_node_lists.py index f816fea..a899a8a 100644 --- a/verarbeitung/update_graph/compare_old_and_new_node_lists.py +++ b/verarbeitung/update_graph/compare_old_and_new_node_lists.py @@ -1,37 +1,39 @@ #!/usr/bin/env python3 from collections import Counter -def compare_old_and_new_node_lists(old_doi_node_list,new_doi_node_list): + +def compare_old_and_new_node_lists(old_doi_node_list, new_doi_node_list): ''' - :param old_doi_node_list: list of dois from old graph + :param old_doi_node_list: list of DOIs from old graph :type old_doi_node_list: List[String] - :param new_doi_node_list: list of dois from new graph + :param new_doi_node_list: list of DOIs from new graph :type new_doi_node_list: List[String] function to calculate, which nodes from the old graph are deleted and which are added ''' - dois_from_old_graph = old_doi_node_list #WICHTIG: Keine doppelten DOIs + dois_from_old_graph = old_doi_node_list # important: no duplicate DOIs dois_from_new_graph = new_doi_node_list deleted_nodes = [] common_nodes = [] inserted_nodes = [] all_dois = dois_from_old_graph + dois_from_new_graph - for doi in all_dois: # iterates over the merged list of new and old dois - if ((all_dois.count(doi) == 2) & (doi not in common_nodes)): # If the doi occurs twice the node is in the old and the new graph - common_nodes.append(doi) #appends the doi to common ones, if its not alredy in it - elif ((doi in dois_from_old_graph) & (doi not in dois_from_new_graph)): #If the doi occurs once and it is from old graph it is a deleted node - deleted_nodes.append(doi) #appends the doi to deleted ones - elif ((doi in dois_from_new_graph) & (doi not in dois_from_old_graph)): #if the doi occurs ince and it is from new graph it is a inserted node - inserted_nodes.append(doi) #appends the doi to the inserted ones - return(common_nodes, inserted_nodes, deleted_nodes) - - -#Test Prints - #liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"] - #liste_2 = ["doi_1","doi_2","doi_3","doi_6","doi_7"] - #print("gemeinsame Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[0]) - #print("hinzugefügte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[1]) - #print("gelöschte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[2]) + for doi in all_dois: # iterates over the merged list of new and old DOIs + if ((all_dois.count(doi) == 2) & ( + doi not in common_nodes)): # If the DOI occurs twice the node is in the old and the new graph + common_nodes.append(doi) # appends the DOI to common ones, if its not already in it + elif ((doi in dois_from_old_graph) & ( + doi not in dois_from_new_graph)): # If the DOI occurs once and it is from old graph it is a deleted node + deleted_nodes.append(doi) # appends the DOI to deleted ones + elif ((doi in dois_from_new_graph) & ( + doi not in dois_from_old_graph)): # if the DOI occurs ince and it is from new graph it is a inserted node + inserted_nodes.append(doi) # appends the DOI to the inserted ones + return (common_nodes, inserted_nodes, deleted_nodes) +# Test Prints +# liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"] +# liste_2 = ["doi_1","doi_2","doi_3","doi_6","doi_7"] +# print("gemeinsame Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[0]) +# print("hinzugefügte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[1]) +# print("gelöschte Elemente: ",doi_listen_vergleichen(liste_1,liste_2)[2]) diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index 7dcad9b..6beb4a8 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -4,16 +4,17 @@ Functions to update a graph representing citations between multiple ACS/Nature j """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" -import sys +# __copyright__ = "" +# __credits__ = ["", "", "", ""] +# __license__ = "" +# __version__ = "" +# __maintainer__ = "" + +import sys from pathlib import Path from os import error @@ -25,30 +26,32 @@ from verarbeitung.construct_new_graph.initialize_graph import init_graph_constru from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit - def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, new_search_depth, cit_type): global nodes, edges nodes = test_nodes edges = test_edges - return(find_furthermost_citations(nodes, edges, changed_node, old_search_depth, new_search_depth, cit_type)) + return (find_furthermost_citations(nodes, edges, changed_node, old_search_depth, new_search_depth, cit_type)) -def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height): + +def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height, + new_search_depth, new_search_height): global nodes, edges nodes = test_nodes edges = test_edges - handled_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, True) - return(new_nodes, new_edges, handled_nodes) - + handled_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_test_nodes, old_search_depth, + old_search_height, new_search_depth, + new_search_height, True) + return (new_nodes, new_edges, handled_nodes) def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new_search_depth, cit_type): ''' - :param new_nodes: list of nodes which are generated seperately from main node list to avoid recursive problems + :param new_nodes: list of nodes which are generated separately from main node list to avoid recursive problems :type new_nodes List[Publication] - :param new_edges: list of edges which are generated seperately from main edge list to avoid recursive problems + :param new_edges: list of edges which are generated separately from main edge list to avoid recursive problems :type new_edges: List[List[String,String]] :param node: node which is known but not from input group @@ -78,46 +81,47 @@ def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new if cit_type == "Citation": - # to find a cyclus and not change height + # to find a cycle and not change height not_in_citations = True for new_cit_node_citation in new_cit_node.citations: if (cit_node.doi_url == new_cit_node_citation.doi_url): not_in_citations = False break - if (not_in_citations): + if (not_in_citations): new_citations.append(new_cit_node) # change height accordingly and add link to edge new_cit_node.group = node.group + depth - if [cit_node.doi_url,cit_node.doi_url] not in new_edges: - new_edges.append([new_cit_node.doi_url,cit_node.doi_url]) - + if [cit_node.doi_url, cit_node.doi_url] not in new_edges: + new_edges.append([new_cit_node.doi_url, cit_node.doi_url]) + elif cit_type == "Reference": - # to find a cyclus and not change depth + # to find a cycle and not change depth not_in_citations = True for new_cit_node_reference in new_cit_node.references: if (new_cit_node.doi_url == new_cit_node_reference.doi_url): not_in_citations = False break - if (not_in_citations): + if (not_in_citations): new_citations.append(new_cit_node) # change height accordingly and add link to edge new_cit_node.group = node.group + depth if [cit_node.doi_url, new_cit_node.doi_url] not in new_edges: new_edges.append([cit_node.doi_url, new_cit_node.doi_url]) - + citations_saved = new_citations for new_citation in new_citations: if new_citation not in new_nodes: new_nodes.append(new_citation) - # returns the references/citations which needs to be processed to complete contruction - return(citations_saved) + # returns the references/citations which needs to be processed to complete construction + return (citations_saved) -def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var): +def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, + new_search_height, test_var): ''' :param inserted_nodes: list of nodes which are inserted to new input array :type inserted_nodes: List[String] @@ -134,60 +138,64 @@ def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_he :param new_search_height: height to search for citations from new construction call :type new_search_height: int - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean completes the references and citations for nodes which were known in non input group ''' - #changed_group_node_citations = [] - #changed_group_node_references = [] + # changed_group_node_citations = [] + # changed_group_node_references = [] # saves which nodes were handled because they were known before handled_inserted_nodes = [] new_nodes = [] new_edges = [] for node in nodes: - + # moves known reference node to input and completes citations and references for this node if (node.group < 0) and (node.doi_url in inserted_nodes): # get pub from input pub = get_pub(node.doi_url, test_var) if (type(pub) != Publication): - error_doi_list.append(node.doi_url) continue # find old maximum publications and complete tree to new max depth pub.group = node.group - old_max_references = find_furthermost_citations(new_nodes, new_edges, pub, old_search_depth, new_search_depth, "Reference") - add_citations(new_nodes, new_edges, old_max_references, min(old_search_depth - abs(node.group), new_search_depth), new_search_depth, "Reference", test_var) + old_max_references = find_furthermost_citations(new_nodes, new_edges, pub, old_search_depth, + new_search_depth, "Reference") + add_citations(new_nodes, new_edges, old_max_references, + min(old_search_depth - abs(node.group), new_search_depth), new_search_depth, "Reference", + test_var) # add tree for citations add_citations(new_nodes, new_edges, [pub], 0, new_search_height, "Citation", test_var) - + pub.group = 0 - new_nodes.append(pub) + new_nodes.append(pub) handled_inserted_nodes.append(node) - + # moves known citation node to input and completes citations and references for this node elif (node.group > 0) and (node.doi_url in inserted_nodes): # get pub from input pub = get_pub(node.doi_url, test_var) if (type(pub) != Publication): - error_doi_list.append(node.doi_url) continue # find old maximum publications and complete tree to new max depth pub.group = node.group - old_max_citations = find_furthermost_citations(new_nodes, new_edges, pub, old_search_height, new_search_height, "Citation") - add_citations(new_nodes, new_edges, old_max_citations, min(old_search_height - abs(node.group), new_search_height), new_search_height, "Citation", test_var) + old_max_citations = find_furthermost_citations(new_nodes, new_edges, pub, old_search_height, + new_search_height, "Citation") + add_citations(new_nodes, new_edges, old_max_citations, + min(old_search_height - abs(node.group), new_search_height), new_search_height, "Citation", + test_var) # add tree for citations - add_citations(new_nodes, new_edges, [pub], 0, new_search_depth, "Reference", test_var) - + add_citations(new_nodes, new_edges, [pub], 0, new_search_depth, "Reference", test_var) + pub.group = 0 new_nodes.append(pub) handled_inserted_nodes.append(node) @@ -198,10 +206,11 @@ def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_he if new_node.doi_url == inserted_node: new_node.group = 0 - return(handled_inserted_nodes, new_nodes, new_edges) + return (handled_inserted_nodes, new_nodes, new_edges) -def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var = False): +def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height, + new_search_depth, new_search_height, test_var=False): ''' :param input_nodes_list: list of nodes which are processed for new construction call :type input_nodes_list: List[Publication] @@ -224,7 +233,7 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes :param new_search_height: height to search for citations from new construction call :type new_search_height: int - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean completes the references and citations for nodes which were known in non input group @@ -234,17 +243,21 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes edges = input_edges_list.copy() error_doi_list = [] - handled_inserted_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var) + handled_inserted_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_nodes, old_search_depth, + old_search_height, new_search_depth, + new_search_height, test_var) # copy all nodes from inserted_nodes to new node, if node is not in handled_inserted_nodes not_handled_inserted_nodes = [node for node in inserted_nodes if node not in handled_inserted_nodes] - # function call to begin recursive processing up to max depth/height for unhandled nodes if len(not_handled_inserted_nodes) > 0: - new_nodes, new_edges, error_doi_list_new = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = new_nodes, input_edges = new_edges) + new_nodes, new_edges, error_doi_list_new = init_graph_construction(not_handled_inserted_nodes, new_search_depth, + new_search_height, test_var=test_var, + update_var=True, input_nodes=new_nodes, + input_edges=new_edges) for err_node in error_doi_list_new: if err_node not in error_doi_list: error_doi_list.append(err_node) - return(new_nodes, new_edges, error_doi_list) \ No newline at end of file + return (new_nodes, new_edges, error_doi_list) diff --git a/verarbeitung/update_graph/delete_nodes_edges.py b/verarbeitung/update_graph/delete_nodes_edges.py index 08d0dbb..b7af110 100644 --- a/verarbeitung/update_graph/delete_nodes_edges.py +++ b/verarbeitung/update_graph/delete_nodes_edges.py @@ -4,7 +4,7 @@ Functions to remove publications/links from nodes/edges list, if they can no lon """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" @@ -13,7 +13,7 @@ __status__ = "Production" #__version__ = "" #__maintainer__ = "" -import sys +import sys from pathlib import Path sys.path.append("../../") @@ -34,7 +34,6 @@ def search_ref_cit_graph_rec_test(pubs, new_test_input, old_max_depth, cit_var): return usable_nodes - def search_ref_graph_rec(pub, curr_depth, old_max_depth): ''' :param pub: pub go get appended to usable_nodes @@ -61,11 +60,11 @@ def search_ref_graph_rec(pub, curr_depth, old_max_depth): if (reference.doi_url == citation.doi_url and citation.doi_url not in usable_doi_nodes): not_in_citations = False break - if not_in_citations and curr_depth < old_max_depth: + if not_in_citations and curr_depth < old_max_depth: search_ref_graph_rec(ref_pub, curr_depth + 1, old_max_depth) -def search_cit_graph_rec(pub, curr_height, old_max_height): +def search_cit_graph_rec(pub, curr_height, old_max_height): ''' :param pub: pub go get appended to usable_nodes :type pub: Publication @@ -77,7 +76,7 @@ def search_cit_graph_rec(pub, curr_height, old_max_height): :type old_max_height: int function that appends nodes of group "citation" to list usable_nodes, if they are reachable from input nodes - ''' + ''' usable_doi_nodes = [] for citation in pub.citations: @@ -87,13 +86,13 @@ def search_cit_graph_rec(pub, curr_height, old_max_height): usable_nodes.append(cit_pub) usable_doi_nodes.append(cit_pub.doi_url) - # to find a cyclus and avoid recursion error + # to find a cycle and avoid recursion error not_in_references = True for reference in pub.references: if (citation.doi_url == reference.doi_url and reference.doi_url not in usable_doi_nodes): not_in_references = False break - if not_in_references and curr_height < old_max_height: + if not_in_references and curr_height < old_max_height: search_cit_graph_rec(cit_pub,curr_height + 1, old_max_height) @@ -103,7 +102,7 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list, old_depth, :param input_list: list of publications to get reduced :type input_list: List[Publication] - :param common_nodes: list of input dois which are in old and new input call + :param common_nodes: list of input DOIs which are in old and new input call :type common_nodes: List[String] :param old_edges_list: list of links between publications from old call diff --git a/verarbeitung/update_graph/import_from_json.py b/verarbeitung/update_graph/import_from_json.py index 72217d5..36b4778 100644 --- a/verarbeitung/update_graph/import_from_json.py +++ b/verarbeitung/update_graph/import_from_json.py @@ -4,7 +4,7 @@ Functions to read old json files to recreate old graph structure """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" @@ -13,7 +13,7 @@ __status__ = "Production" #__version__ = "" #__maintainer__ = "" - + import json import sys sys.path.append("../") @@ -21,27 +21,27 @@ sys.path.append("../") from input.publication import Publication, Citation - def create_pubs_from_json(input_dict): ''' - :param input_dict: dictionary read from old graph Json File + :param input_dict: dictionary read from old graph json file :type json_file: dictionary creates list of publication retrieved from old json file ''' - - #iterates over the list of nodes - for node in input_dict["nodes"]: - #creates for the nodes the objects class Publication - + + # iterates over the list of nodes + for node in input_dict["nodes"]: + # creates for the nodes the objects class Publication + pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], []) pub.group = node["depth"] - #appends the objects to a list - list_of_nodes_py.append(pub) + # appends the objects to a list + list_of_nodes_py.append(pub) + def add_ref_and_cit_to_pubs(input_dict): ''' - :param input_dict: dictionary read from old graph Json File + :param input_dict: dictionary read from old graph json file :type json_file: dictionary adds references and citations to retrieved publication list @@ -52,21 +52,21 @@ def add_ref_and_cit_to_pubs(input_dict): for source in list_of_nodes_py: for target in list_of_nodes_py: - # when correct dois found, adds then as references/citatons to publication list + # when correct dois found, adds then as references/citations to publication list if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])): new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference") source.references.append(new_reference) new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation") target.citations.append(new_citation) - + # adds edge to list list_of_edges_py.append([edge["source"],edge["target"]]) def input_from_json(json_file): ''' - :param json_file: Json-Datei for the old graph + :param json_file: Json file for the old graph :type json_file: String retrieves information from old json file to be reused for new graph construction @@ -77,8 +77,8 @@ def input_from_json(json_file): list_of_nodes_py = [] list_of_edges_py = [] - #opens the json file and saves content in dictionary - with open(json_file,'r') as file: + # opens the json file and saves content in dictionary + with open(json_file,'r') as file: input_dict = json.load(file) # creates nodes of Class Publication from input Json file @@ -89,6 +89,5 @@ def input_from_json(json_file): old_depth_height = input_dict["depth_height"] old_depth = old_depth_height[0] old_height = old_depth_height[1] - return(list_of_nodes_py, list_of_edges_py, old_depth, old_height) \ No newline at end of file diff --git a/verarbeitung/update_graph/update_depth.py b/verarbeitung/update_graph/update_depth.py index f843bb5..27607fc 100644 --- a/verarbeitung/update_graph/update_depth.py +++ b/verarbeitung/update_graph/update_depth.py @@ -4,7 +4,7 @@ Functions to update the citation depth of recursive graph construction """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" @@ -45,6 +45,7 @@ def reduce_max_height_depth_test(pubs, max_dh, dh_var): reduce_max_depth(max_dh) return processed_input_list + def get_old_max_references_citations_test(pubs, old_dh, dh_var): ''' :param pubs: list of publication to reduce height/depth in @@ -66,6 +67,7 @@ def get_old_max_references_citations_test(pubs, old_dh, dh_var): else: return(get_old_max_references(old_dh, True)) + def reduce_max_height(max_height): ''' :param max_height: new maximum height to reduce publications in publication list to @@ -79,6 +81,7 @@ def reduce_max_height(max_height): if (pub.group > max_height): processed_input_list.remove(pub) + def reduce_max_depth(max_depth): ''' :param max_depth: new maximum depth to reduce publications in publication list to @@ -110,6 +113,7 @@ def get_old_max_references(old_depth, test_var): old_max_references.append(pub) return(old_max_references) + def get_old_max_citations(old_height, test_var): ''' :param old_height: old maximum height to search for citations @@ -122,11 +126,11 @@ def get_old_max_citations(old_height, test_var): if (pub.group == old_height): pub = get_pub(pub.doi_url, test_var) if (type(pub) != Publication): - #print(pub) continue old_max_citations.append(pub) return(old_max_citations) + def update_depth(obj_input_list, input_edges, new_depth, new_height, old_depth, old_height, test_var): ''' :param obj_input_list: input list of publications of type Publication from update_graph @@ -141,7 +145,7 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, old_depth, :param new_height: new maximum height to search for citations :type new_height: int - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean function to adjust old publication search depth to update call @@ -163,13 +167,9 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, old_depth, elif (old_height < new_height): old_max_citations = get_old_max_citations(old_height, test_var) add_citations(processed_input_list, valid_edges, old_max_citations, old_height, new_height, "Citation", test_var) - - - - + back_to_valid_edges(valid_edges, processed_input_list) - # adds edges between reference group and citation group of known publications diff --git a/verarbeitung/update_graph/update_edges.py b/verarbeitung/update_graph/update_edges.py index 1b046a1..45a7bde 100644 --- a/verarbeitung/update_graph/update_edges.py +++ b/verarbeitung/update_graph/update_edges.py @@ -12,8 +12,7 @@ def back_to_valid_edges(links_from_json, processed_input_list): ''' list_of_valid_edges = links_from_json.copy() - - #iterates over all edges from old graph + # iterates over all edges from old graph for edge in list_of_valid_edges: # counter for adjacent nodes @@ -27,6 +26,6 @@ def back_to_valid_edges(links_from_json, processed_input_list): if (found_adj_nodes == 2): break - #removes the edge if less than 2 adjacent nodes found + # removes the edge if less than 2 adjacent nodes found if (found_adj_nodes < 2): links_from_json.remove(edge) \ No newline at end of file diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 8143ab4..2bb93c3 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -4,7 +4,7 @@ Functions to update a graph representing citations between multiple ACS/Nature j """ -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" @@ -34,7 +34,7 @@ def get_old_input_dois(old_obj_input_list): :param old_obj_input_list: list of publications retrieved from old json file :type old_obj_input_list: List[Publication] - function to return pub dois for old publications of group input retrieved from json file + function to return pub DOIs for old publications of group input retrieved from json file ''' # new list to save doi_url for each old publication of group input @@ -44,15 +44,16 @@ def get_old_input_dois(old_obj_input_list): old_input_dois.append(pub.doi_url) return old_input_dois + def get_new_input_dois(new_input, test_var): ''' - :param new_input: input list of doi from UI + :param new_input: input list of DOI from UI :type new_input: list of strings - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean - function to return pub dois for input urls + function to return pub DOIs for input urls ''' # new list to save doi_url for each new input url @@ -61,7 +62,6 @@ def get_new_input_dois(new_input, test_var): # retrieves information and adds to new list if successful pub = get_pub(new_node, test_var) if (type(pub) != Publication): - #print(pub) error_doi_list.append(new_node) continue @@ -80,13 +80,13 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes :param old_edges_list: list of links between publications retrieved from old json file :type old_edges_list: List[List[String,String]] - :param test_var: variable to differenciate between test and url call + :param test_var: variable to differentiate between test and url call :type test_var: boolean function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges ''' - # gets information from previous cunstruction call + # gets information from previous construction call old_obj_input_list , old_edges_list, old_search_depth, old_search_height = input_from_json(json_file) # one global list to save the process of removing unneeded publications and one to save valid edges @@ -95,16 +95,13 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes valid_edges = old_edges_list error_doi_list = [] - - # get dois from lists to compare for differences + # get DOIs from lists to compare for differences old_doi_input_list = get_old_input_dois(old_obj_input_list) new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var) # retrieve which publications are already known, removed, inserted common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_doi_input_list, new_doi_input_list) - - processed_list_copy = processed_list.copy() valid_edges_copy = valid_edges.copy() @@ -113,8 +110,7 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes # deletes publications and edges from node_list if publications can no longer be reached if (len(deleted_nodes) > 0): processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges, old_search_depth, old_search_height) - - + if (len(inserted_nodes) > 0): inserted_pub_nodes, inserted_edges, error_doi_list_new = connect_old_and_new_input(processed_list_copy, valid_edges_copy, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var) for err_node in error_doi_list_new: -- GitLab