diff --git a/.gitignore b/.gitignore index 61f2dc9f84d472c32fa57194620d6b1e5fa14649..df2eaf4579d8730d30a9b69b2e0568794c12e27f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,129 @@ -**/__pycache__/ +# Editors +.vscode/ +.VSCodeCounter/ +.idea/ + +# Vagrant +.vagrant/ + +# Mac/OSX +.DS_Store + +# Windows +Thumbs.db + +# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# json +*.json \ No newline at end of file diff --git a/verarbeitung/test/update_graph_unittest.py b/verarbeitung/test/update_graph_unittest.py index 59de002b8d4efdb87f0d57d80bc1f6f6d7889b49..765fcbf6d2fd2679a62901264a70526ed44deb77 100644 --- a/verarbeitung/test/update_graph_unittest.py +++ b/verarbeitung/test/update_graph_unittest.py @@ -3,6 +3,7 @@ import unittest import sys from pathlib import Path + sys.path.append("../") from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction @@ -10,18 +11,15 @@ from verarbeitung.construct_new_graph.export_to_json import output_to_json from verarbeitung.update_graph.import_from_json import input_from_json from verarbeitung.update_graph.update_graph import update_graph, get_old_input_dois, get_new_input_dois from verarbeitung.update_graph.update_depth import reduce_max_height_depth_test, get_old_max_references_citations_test +from verarbeitung.update_graph.update_edges import back_to_valid_edges +from verarbeitung.update_graph.delete_nodes_edges import search_ref_cit_graph_rec_test +from verarbeitung.update_graph.compare_old_and_new_node_lists import compare_old_and_new_node_lists +from verarbeitung.update_graph.connect_new_input import find_furthermost_citations_test from verarbeitung.get_pub_from_input import input_test_func class UpdatingTest(unittest.TestCase): maxDiff = None - def test_import_from_json(self): - nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True) - output_to_json(nodes_old, edges_old, test_var = True) - nodes_new, edges_new = input_from_json('test_output.json') - self.assertCountEqual(nodes_old,nodes_new) - self.assertCountEqual(edges_old, edges_new) - def test_deleted_input_dois(self): nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True) nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True) @@ -135,9 +133,113 @@ class UpdatingTest(unittest.TestCase): pub_lg_2_d_21.group = -2 pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_h_22,pub_lg_2_d_11, pub_lg_2_d_21] self.assertCountEqual(get_old_max_references_citations_test(pubs, 2, "Height"), [pub_lg_2_h_21,pub_lg_2_h_22]) + + # import_from_json.py: + + def test_input_from_json(self): + nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True) + output_to_json(nodes_old, edges_old, test_var = True) + nodes_new, edges_new = input_from_json('test_output.json') + self.assertCountEqual(nodes_old,nodes_new) + self.assertCountEqual(edges_old, edges_new) + + # update_edges.py: + + def test_back_to_valid_edges(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h_11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h_11.group = 1 + pub_lg_2_d_11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d_11.group = -1 + pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_d_11] + edges = [['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11'],['doi_lg_2_h21','doi_lg_2_h11'],['doi_lg_2_i','doi_lg_2_d21']] + back_to_valid_edges(edges, pubs) + self.assertCountEqual([['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11']],edges) + + # delete_nodes_edges.py: + + def test_search_ref_graph_rec(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h11.group = 1 + pub_lg_2_h12 = input_test_func('doi_lg_2_h12') + pub_lg_2_h12.group = 1 + pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d11.group = -1 + pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + pub_lg_2_d12.group = -1 + pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h21.group = 2 + pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + pub_lg_2_h22.group = 2 + pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d21.group = -2 + + pub_cg_i = input_test_func('doi_cg_i') + pub_cg_i.group = 0 + pub_cg_h11 = input_test_func('doi_cg_h11') + pub_cg_h11.group = 1 + pub_cg_d12 = input_test_func('doi_cg_d11') + pub_cg_d12.group = -1 + pub_cg_d11 = input_test_func('doi_cg_d12') + pub_cg_d11.group = -1 + pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_cg_i, pub_cg_d11, pub_cg_d12, pub_cg_h11] + usable_nodes = search_ref_cit_graph_rec_test(pubs, [pub_cg_i], "Citation") + self.assertCountEqual(usable_nodes, [pub_cg_h11, pub_lg_2_h11, pub_lg_2_h21, pub_lg_2_h22]) + + # compare_old_and_new_node_lists.py: + + def test_compare_old_and_new_nodes(self): + old_input = ['doi_lg_1_i', 'doi_lg_2_i'] + new_input = ['doi_lg_1_i', 'doi_cg_i'] + common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_input, new_input) + self.assertCountEqual(common_nodes, ['doi_lg_1_i']) + self.assertCountEqual(inserted_nodes, ['doi_cg_i']) + self.assertCountEqual(deleted_nodes, ['doi_lg_2_i']) + + + # connect_new_input.py: + + def test_find_furthermost_citations(self): + pub_lg_2_i = input_test_func('doi_lg_2_i') + pub_lg_2_i.group = 0 + pub_lg_2_h11 = input_test_func('doi_lg_2_h11') + pub_lg_2_h11.group = 1 + pub_lg_2_h12 = input_test_func('doi_lg_2_h12') + pub_lg_2_h12.group = 1 + pub_lg_2_d11 = input_test_func('doi_lg_2_d11') + pub_lg_2_d11.group = -1 + pub_lg_2_d12 = input_test_func('doi_lg_2_d12') + pub_lg_2_d12.group = -1 + pub_lg_2_h21 = input_test_func('doi_lg_2_h21') + pub_lg_2_h21.group = 2 + pub_lg_2_h22 = input_test_func('doi_lg_2_h22') + pub_lg_2_h22.group = 2 + pub_lg_2_d21 = input_test_func('doi_lg_2_d21') + pub_lg_2_d21.group = -2 + pub_lg_2_d22 = input_test_func('doi_lg_2_d22') + pub_lg_2_d22.group = -2 + pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_lg_2_d22] + self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_h11, 2, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) + self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_h11, 1, "Citation"),[pub_lg_2_h11]) + + self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_d11, 2, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) + self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_d11, 1, "Reference"),[pub_lg_2_d11]) + def test_complete_changed_group_nodes(self): + pub_cg_i = input_test_func('doi_cg_i') + pub_cg_i.group = 0 + pub_cg_h11 = input_test_func('doi_cg_h11') + pub_cg_h11.group = 1 + pub_cg_h21 = input_test_func('doi_cg_h21') + pub_cg_h21.group = 2 + pub_cg_h22 = input_test_func('doi_cg_h22') + pub_cg_h22.group = 2 + pubs = [] diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index 5acb05f81f34277231d8b3447d0b89cc34b2f11e..d9cbf82374b49a8e07a9c5e94168a44575a3d18a 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -23,6 +23,21 @@ from verarbeitung.construct_new_graph.initialize_graph import init_graph_constru from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit +def find_furthermost_citations_test(test_nodes, changed_node, old_search_depth, cit_type): + global nodes + nodes = test_nodes + + return(find_furthermost_citations(changed_node, old_search_depth, cit_type)) + +def complete_changed_group_nodes_test(test_nodes, inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height): + global nodes + nodes = test_nodes + + handled_nodes = complete_changed_group_nodes(inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, True) + return(nodes, handled_nodes) + + + def find_furthermost_citations(node, old_search_depth, cit_type): ''' :param node: node which is known but not from input group @@ -36,7 +51,7 @@ def find_furthermost_citations(node, old_search_depth, cit_type): function to find the furthermost citation/reference for given node which is from the same group ''' - citations_saved = get_cit_type_list(node, cit_type) + citations_saved = [node] # group of node and old search depth/height determines how often the loop needs to be repeated for depth in range(old_search_depth - abs(node.group)): @@ -54,6 +69,7 @@ def find_furthermost_citations(node, old_search_depth, cit_type): # returns the references/citations which needs to be processed to complete contruction return(citations_saved) + def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var): ''' :param inserted_nodes: list of nodes which are inserted to new input array diff --git a/verarbeitung/update_graph/delete_nodes_edges.py b/verarbeitung/update_graph/delete_nodes_edges.py index 6960f3e32655142988cddf1e9f76f9549dbc8498..205c82e0c814a0901a47011f3ee9e826ae31a8ab 100644 --- a/verarbeitung/update_graph/delete_nodes_edges.py +++ b/verarbeitung/update_graph/delete_nodes_edges.py @@ -20,6 +20,21 @@ sys.path.append("../../") from .update_edges import back_to_valid_edges +def search_ref_cit_graph_rec_test(pubs, new_test_input, cit_var): + global usable_nodes, input_obj_list + usable_nodes = [] + input_obj_list = pubs + + if cit_var == "Reference": + for pub in new_test_input: + search_ref_graph_rec(pub) + elif cit_var == "Citation": + for pub in new_test_input: + search_cit_graph_rec(pub) + return usable_nodes + + + def search_ref_graph_rec(pub): ''' :param pub: pub go get appended to usable_nodes diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 2aaf2140dcb6daf69a30f47acfdbd079a6e41374..b7b89ab9908b6493076f9b65f422896a6364a0ab 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -101,15 +101,16 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes # retrieve which publications are already known, removed, inserted common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_doi_input_list, new_doi_input_list) - # deletes publications and edges from node_list if publications can no longer be reached - if (len(deleted_nodes) > 0): - processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges) - old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var) if (len(inserted_nodes) > 0): connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var) + # deletes publications and edges from node_list if publications can no longer be reached + if (len(deleted_nodes) > 0): + processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges) + + if test_var: return(processed_list, valid_edges) else: