From f6a285d6fc27a1451bff1c7d92a114df5d0cacbd Mon Sep 17 00:00:00 2001
From: Malte Schokolowski <baw8441@uni-hamburg.de>
Date: Mon, 10 Jan 2022 15:12:26 +0100
Subject: [PATCH] Debugging und Testwriting mit Donna und Schoko

---
 .gitignore                                    | 130 +++++++++++++++++-
 verarbeitung/test/update_graph_unittest.py    | 116 +++++++++++++++-
 .../update_graph/connect_new_input.py         |  18 ++-
 .../update_graph/delete_nodes_edges.py        |  15 ++
 verarbeitung/update_graph/update_graph.py     |   9 +-
 5 files changed, 275 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 61f2dc9..df2eaf4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,129 @@
-**/__pycache__/
+# Editors
+.vscode/
+.VSCodeCounter/
+.idea/
+
+# Vagrant
+.vagrant/
+
+# Mac/OSX
+.DS_Store
+
+# Windows
+Thumbs.db
+
+# Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# json
+*.json
\ No newline at end of file
diff --git a/verarbeitung/test/update_graph_unittest.py b/verarbeitung/test/update_graph_unittest.py
index 59de002..765fcbf 100644
--- a/verarbeitung/test/update_graph_unittest.py
+++ b/verarbeitung/test/update_graph_unittest.py
@@ -3,6 +3,7 @@ import unittest
 import sys  
 from pathlib import Path
 
+
 sys.path.append("../")
 
 from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
@@ -10,18 +11,15 @@ from verarbeitung.construct_new_graph.export_to_json import output_to_json
 from verarbeitung.update_graph.import_from_json import input_from_json
 from verarbeitung.update_graph.update_graph import update_graph, get_old_input_dois, get_new_input_dois
 from verarbeitung.update_graph.update_depth import reduce_max_height_depth_test, get_old_max_references_citations_test
+from verarbeitung.update_graph.update_edges import back_to_valid_edges
+from verarbeitung.update_graph.delete_nodes_edges import search_ref_cit_graph_rec_test
+from verarbeitung.update_graph.compare_old_and_new_node_lists import compare_old_and_new_node_lists
+from verarbeitung.update_graph.connect_new_input import find_furthermost_citations_test
 from verarbeitung.get_pub_from_input import input_test_func
 
 class UpdatingTest(unittest.TestCase):
      maxDiff = None
 
-     def test_import_from_json(self):
-          nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True)
-          output_to_json(nodes_old, edges_old, test_var = True)
-          nodes_new, edges_new = input_from_json('test_output.json')
-          self.assertCountEqual(nodes_old,nodes_new)
-          self.assertCountEqual(edges_old, edges_new)
-
      def test_deleted_input_dois(self):
           nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True)
           nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
@@ -135,9 +133,113 @@ class UpdatingTest(unittest.TestCase):
           pub_lg_2_d_21.group = -2
           pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_h_21, pub_lg_2_h_22,pub_lg_2_d_11, pub_lg_2_d_21]
           self.assertCountEqual(get_old_max_references_citations_test(pubs, 2, "Height"), [pub_lg_2_h_21,pub_lg_2_h_22])
+
+     # import_from_json.py:
+
+     def test_input_from_json(self):
+          nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True)
+          output_to_json(nodes_old, edges_old, test_var = True)
+          nodes_new, edges_new = input_from_json('test_output.json')
+          self.assertCountEqual(nodes_old,nodes_new)
+          self.assertCountEqual(edges_old, edges_new)
+
+     # update_edges.py:
+
+     def test_back_to_valid_edges(self):
+          pub_lg_2_i = input_test_func('doi_lg_2_i')
+          pub_lg_2_i.group = 0
+          pub_lg_2_h_11 = input_test_func('doi_lg_2_h11')
+          pub_lg_2_h_11.group = 1
+          pub_lg_2_d_11 = input_test_func('doi_lg_2_d11')
+          pub_lg_2_d_11.group = -1
+          pubs = [pub_lg_2_i, pub_lg_2_h_11, pub_lg_2_d_11]
+          edges = [['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11'],['doi_lg_2_h21','doi_lg_2_h11'],['doi_lg_2_i','doi_lg_2_d21']]
+          back_to_valid_edges(edges, pubs)
+          self.assertCountEqual([['doi_lg_2_h11','doi_lg_2_i'],['doi_lg_2_i','doi_lg_2_d11']],edges)
+
+     # delete_nodes_edges.py:
+
+     def test_search_ref_graph_rec(self):
+          pub_lg_2_i = input_test_func('doi_lg_2_i')
+          pub_lg_2_i.group = 0
+          pub_lg_2_h11 = input_test_func('doi_lg_2_h11')
+          pub_lg_2_h11.group = 1
+          pub_lg_2_h12 = input_test_func('doi_lg_2_h12')
+          pub_lg_2_h12.group = 1
+          pub_lg_2_d11 = input_test_func('doi_lg_2_d11')
+          pub_lg_2_d11.group = -1
+          pub_lg_2_d12 = input_test_func('doi_lg_2_d12')
+          pub_lg_2_d12.group = -1
+          pub_lg_2_h21 = input_test_func('doi_lg_2_h21')
+          pub_lg_2_h21.group = 2
+          pub_lg_2_h22 = input_test_func('doi_lg_2_h22')
+          pub_lg_2_h22.group = 2
+          pub_lg_2_d21 = input_test_func('doi_lg_2_d21')
+          pub_lg_2_d21.group = -2
+
+          pub_cg_i = input_test_func('doi_cg_i')
+          pub_cg_i.group = 0
+          pub_cg_h11 = input_test_func('doi_cg_h11')
+          pub_cg_h11.group = 1
+          pub_cg_d12 = input_test_func('doi_cg_d11')
+          pub_cg_d12.group = -1
+          pub_cg_d11 = input_test_func('doi_cg_d12')
+          pub_cg_d11.group = -1
+          pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_cg_i, pub_cg_d11, pub_cg_d12, pub_cg_h11]
+          usable_nodes = search_ref_cit_graph_rec_test(pubs, [pub_cg_i], "Citation")
+          self.assertCountEqual(usable_nodes, [pub_cg_h11, pub_lg_2_h11, pub_lg_2_h21, pub_lg_2_h22])
+
+     # compare_old_and_new_node_lists.py:
+
+     def test_compare_old_and_new_nodes(self):
+          old_input = ['doi_lg_1_i', 'doi_lg_2_i']
+          new_input = ['doi_lg_1_i', 'doi_cg_i']
+          common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_input, new_input)
+          self.assertCountEqual(common_nodes, ['doi_lg_1_i'])
+          self.assertCountEqual(inserted_nodes, ['doi_cg_i'])
+          self.assertCountEqual(deleted_nodes, ['doi_lg_2_i'])
+
+
+     # connect_new_input.py:
+
+     def test_find_furthermost_citations(self):
+          pub_lg_2_i = input_test_func('doi_lg_2_i')
+          pub_lg_2_i.group = 0
+          pub_lg_2_h11 = input_test_func('doi_lg_2_h11')
+          pub_lg_2_h11.group = 1
+          pub_lg_2_h12 = input_test_func('doi_lg_2_h12')
+          pub_lg_2_h12.group = 1
+          pub_lg_2_d11 = input_test_func('doi_lg_2_d11')
+          pub_lg_2_d11.group = -1
+          pub_lg_2_d12 = input_test_func('doi_lg_2_d12')
+          pub_lg_2_d12.group = -1
+          pub_lg_2_h21 = input_test_func('doi_lg_2_h21')
+          pub_lg_2_h21.group = 2
+          pub_lg_2_h22 = input_test_func('doi_lg_2_h22')
+          pub_lg_2_h22.group = 2
+          pub_lg_2_d21 = input_test_func('doi_lg_2_d21')
+          pub_lg_2_d21.group = -2
+          pub_lg_2_d22 = input_test_func('doi_lg_2_d22')
+          pub_lg_2_d22.group = -2
+          pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_lg_2_d22]
           
+          self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_h11, 2, "Citation"),[pub_lg_2_h21, pub_lg_2_h22])
+          self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_h11, 1, "Citation"),[pub_lg_2_h11])
+
+          self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_d11, 2, "Reference"),[pub_lg_2_d21, pub_lg_2_i])
+          self.assertCountEqual(find_furthermost_citations_test(pubs, pub_lg_2_d11, 1, "Reference"),[pub_lg_2_d11])
 
 
+     def test_complete_changed_group_nodes(self):
+          pub_cg_i = input_test_func('doi_cg_i')
+          pub_cg_i.group = 0
+          pub_cg_h11 = input_test_func('doi_cg_h11')
+          pub_cg_h11.group = 1
+          pub_cg_h21 = input_test_func('doi_cg_h21')
+          pub_cg_h21.group = 2
+          pub_cg_h22 = input_test_func('doi_cg_h22')
+          pub_cg_h22.group = 2
+          pubs = []
 
 
           
diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py
index 5acb05f..d9cbf82 100644
--- a/verarbeitung/update_graph/connect_new_input.py
+++ b/verarbeitung/update_graph/connect_new_input.py
@@ -23,6 +23,21 @@ from verarbeitung.construct_new_graph.initialize_graph import init_graph_constru
 from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit
 
 
+def find_furthermost_citations_test(test_nodes, changed_node, old_search_depth, cit_type):
+    global nodes 
+    nodes = test_nodes
+
+    return(find_furthermost_citations(changed_node, old_search_depth, cit_type))
+
+def complete_changed_group_nodes_test(test_nodes, inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height):
+    global nodes
+    nodes = test_nodes
+
+    handled_nodes = complete_changed_group_nodes(inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, True)
+    return(nodes, handled_nodes)
+
+
+
 def find_furthermost_citations(node, old_search_depth, cit_type):
     '''
         :param node:                node which is known but not from input group
@@ -36,7 +51,7 @@ def find_furthermost_citations(node, old_search_depth, cit_type):
 
         function to find the furthermost citation/reference for given node which is from the same group
     '''
-    citations_saved = get_cit_type_list(node, cit_type)
+    citations_saved = [node]
 
     # group of node and old search depth/height determines how often the loop needs to be repeated
     for depth in range(old_search_depth - abs(node.group)):
@@ -54,6 +69,7 @@ def find_furthermost_citations(node, old_search_depth, cit_type):
     # returns the references/citations which needs to be processed to complete contruction
     return(citations_saved)
 
+
 def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var):
     '''
         :param inserted_nodes:      list of nodes which are inserted to new input array  
diff --git a/verarbeitung/update_graph/delete_nodes_edges.py b/verarbeitung/update_graph/delete_nodes_edges.py
index 6960f3e..205c82e 100644
--- a/verarbeitung/update_graph/delete_nodes_edges.py
+++ b/verarbeitung/update_graph/delete_nodes_edges.py
@@ -20,6 +20,21 @@ sys.path.append("../../")
 from .update_edges import back_to_valid_edges
 
 
+def search_ref_cit_graph_rec_test(pubs, new_test_input, cit_var):
+    global usable_nodes, input_obj_list
+    usable_nodes = []
+    input_obj_list = pubs
+
+    if cit_var == "Reference":
+        for pub in new_test_input:
+            search_ref_graph_rec(pub)
+    elif cit_var == "Citation":
+        for pub in new_test_input:
+            search_cit_graph_rec(pub)
+    return usable_nodes
+
+
+
 def search_ref_graph_rec(pub):
     '''
     :param pub: pub go get appended to usable_nodes
diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py
index 2aaf214..b7b89ab 100644
--- a/verarbeitung/update_graph/update_graph.py
+++ b/verarbeitung/update_graph/update_graph.py
@@ -101,15 +101,16 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
     # retrieve which publications are already known, removed, inserted
     common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_doi_input_list, new_doi_input_list)
 
-    # deletes publications and edges from node_list if publications can no longer be reached
-    if (len(deleted_nodes) > 0):
-        processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges)
-    
     old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
     
     if (len(inserted_nodes) > 0):
         connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var)
 
+    # deletes publications and edges from node_list if publications can no longer be reached
+    if (len(deleted_nodes) > 0):
+        processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges)
+
+
     if test_var:
         return(processed_list, valid_edges)
     else:
-- 
GitLab