Skip to content
Snippets Groups Projects
Commit 25ac97fc authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

Merge branch 'complete_graph'

parents 004d7d8a c0727d90
No related branches found
No related tags found
No related merge requests found
Showing
with 292 additions and 101 deletions
......@@ -8,4 +8,5 @@ doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
Processing(doi_list, 3, 3, 'test.json')
\ No newline at end of file
doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
Processing(doi_list, 3, 2, 'test.json')
......@@ -32,10 +32,12 @@ def get_cit_type_list(pub, cit_type):
function to create nodes and edges and call create_graph_structure_citations
'''
if (cit_type == "Citation"):
if cit_type == "Citation":
return(pub.citations)
else:
elif cit_type == "Reference":
return(pub.references)
else:
return(ValueError)
def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var):
'''
......
......@@ -78,7 +78,7 @@ def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False)
list_of_edge_dicts = format_edges(edges)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
if (test_var and json_file == 'json_text.json'):
if (test_var):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
else:
......
......@@ -15,6 +15,7 @@ __status__ = "Production"
import sys
import gc
from pathlib import Path
from os import error
sys.path.append("../")
......@@ -24,6 +25,43 @@ from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit
def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_max, test_var):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
for unit test purposes only
'''
global nodes, edges
nodes = []
edges = []
return(initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var))
def complete_inner_edges_test(test_nodes, test_edges):
'''
:param test_nodes: list of publications from unit test
:type test_nodes: List[Publication]
:param test_nodes: list of links from unit test
:type test_nodes: List[List[String,String]]
for unit test purposes only
'''
global nodes, edges
nodes = test_nodes
edges = test_edges
complete_inner_edges()
return(nodes, edges)
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
'''
......@@ -54,6 +92,7 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
# checks if publication already exists in nodes
not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
for node in nodes: #iterates over every node in the set of nodes
if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set
not_in_nodes = False #false --> node will not be created
......@@ -96,7 +135,7 @@ def complete_inner_edges():
edges.append([node.doi_url,reference.doi_url])
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False):
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, update_var = False, input_nodes = [], input_edges = []):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
......@@ -110,6 +149,15 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
:param test_var: variable to check if call is from update_graph with known nodes and edges or fresh construction
:type test_var: boolean
:param input_nodes: list of publications from update_graph
:type input_nodes: List[Publication]
:param input_nodes: list of links from update_graph
:type input_nodes: List[List[String,String]]
main function to start graph generation
'''
......@@ -128,6 +176,10 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va
# creates empty lists to save nodes and edges
global nodes, edges
if update_var:
nodes = input_nodes
edges = input_edges
else:
nodes = []
edges = []
......@@ -141,7 +193,11 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va
# adds edges between reference group and citation group of known publications
complete_inner_edges()
# calls a skript to save nodes and edges of graph in .json file
#output_to_json(nodes, edges, test_var)
# garbage collection to delete nodes and edges lists. Needed because python keeps lists after function end till next function call
new_nodes = nodes.copy()
new_edges = edges.copy()
del nodes
del edges
gc.collect()
return(nodes,edges)
return(new_nodes,new_edges)
This diff is collapsed.
......@@ -16,7 +16,7 @@ __status__ = "Production"
import sys
from pathlib import Path
from os import error
from os import error, path
sys.path.append("../")
......@@ -42,11 +42,11 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso
'''
# updates graph if json file is known in directory otherwise starts new graph construction
try:
with open(json_file) as f:
nodes, edges = update_graph(url_list, json_file, search_depth, search_height)
json_file = Path(json_file)
except IOError:
if json_file.is_file():
nodes, edges = update_graph(url_list, json_file, search_depth, search_height)
else:
nodes, edges = init_graph_construction(url_list, search_depth, search_height)
# exports graph to given json file name
......
This diff is collapsed.
import unittest
import sys
from pathlib import Path
sys.path.append("../")
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction, initialize_nodes_list_test, complete_inner_edges_test
from verarbeitung.construct_new_graph.add_citations_rec import get_cit_type_list
from verarbeitung.test.input_test import input_test_func
class ConstructionTest(unittest.TestCase):
maxDiff = None
def testCycle(self):
nodes, edges = init_graph_construction(['doiz1'],1,1,True)
nodes, edges = init_graph_construction(['doiz1'],1,1,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']])
nodes, edges = init_graph_construction(['doiz1'],2,2,True)
nodes, edges = init_graph_construction(['doiz1'],2,2,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2'])
self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']])
......@@ -28,61 +31,114 @@ class ConstructionTest(unittest.TestCase):
#def testEmptyDepth(self):
def testEmptyDepthHeight(self):
nodes, edges = init_graph_construction(['doi1'],0,0,True)
nodes, edges = init_graph_construction(['doi1'],0,0,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi1'])
self.assertCountEqual(edges, [])
nodes, edges = init_graph_construction(['doi1', 'doi2'],0,0,True)
nodes, edges = init_graph_construction(['doi1', 'doi2'],0,0,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2'])
self.assertCountEqual(edges, [['doi1', 'doi2']])
nodes, edges = init_graph_construction(['doi1', 'doi2', 'doi3'],0,0,True)
nodes, edges = init_graph_construction(['doi1', 'doi2', 'doi3'],0,0,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3'])
self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']])
def testInnerEdges(self):
nodes, edges = init_graph_construction(['doi_ie1'],1,1,True)
nodes, edges = init_graph_construction(['doi_ie1'],1,1,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3'])
self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']])
def testRightHeight(self):
nodes, edges = init_graph_construction(['doi_h01'],1,0,True)
nodes, edges = init_graph_construction(['doi_h01'],0,1,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h01'])
self.assertCountEqual(edges, [])
nodes, edges = init_graph_construction(['doi_h02'],1,0,True)
nodes, edges = init_graph_construction(['doi_h02'],0,1,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1'])
self.assertCountEqual(edges, [['doi_h1','doi_h02']])
nodes, edges = init_graph_construction(['doi_h02'],2,0,True)
nodes, edges = init_graph_construction(['doi_h02'],0,2,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2'])
self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']])
def testRightDepth(self):
nodes, edges = init_graph_construction(['doi_d01'],0,1,True)
nodes, edges = init_graph_construction(['doi_d01'],1,0,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d01'])
self.assertCountEqual(edges, [])
nodes, edges = init_graph_construction(['doi_d02'],0,1,True)
nodes, edges = init_graph_construction(['doi_d02'],1,0,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1'])
self.assertCountEqual(edges, [['doi_d02','doi_d1']])
nodes, edges = init_graph_construction(['doi_d02'],0,2,True)
nodes, edges = init_graph_construction(['doi_d02'],2,0,True,False)
doi_nodes = keep_only_dois(nodes)
self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2'])
self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']])
## Ab hier die Tests für die einzelnen Funktionen ##
# initialize_graph.py:
def test_initialize_nodes_list(self):
references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i','doi_lg_2_i'], 0, 0, True)
doi_references = keep_only_dois(references_pub_obj_list)
doi_citations = keep_only_dois(citations_pub_obj_list)
self.assertCountEqual(doi_references, [])
self.assertCountEqual(doi_citations, [])
references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i','doi_lg_2_i'], 1, 1, True)
doi_references = keep_only_dois(references_pub_obj_list)
doi_citations = keep_only_dois(citations_pub_obj_list)
self.assertCountEqual(doi_references, ['doi_lg_1_d11','doi_lg_1_d12','doi_lg_2_d11','doi_lg_2_d12'])
self.assertCountEqual(doi_citations, ['doi_lg_1_h11','doi_lg_1_h12','doi_cg_i','doi_lg_2_h11','doi_lg_2_h12'])
def test_complete_inner_edges(self):
pub_lg_1_i = input_test_func('doi_lg_1_i')
pub_lg_1_i.group = 0
pub_lg_1_h_12 = input_test_func('doi_lg_1_h12')
pub_lg_1_h_12.group = 1
pub_lg_1_d_12 = input_test_func('doi_lg_1_d12')
pub_lg_1_d_12.group = -1
nodes = [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12]
edges = [['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h12','doi_lg_1_i']]
processed_nodes, processed_edges = complete_inner_edges_test(nodes, edges)
self.assertCountEqual(processed_nodes, [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12])
self.assertCountEqual(processed_edges, [['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h12','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_d12']])
# add_citations_rec.py:
def test_get_type_list(self):
pub_lg_1_i = input_test_func('doi_lg_1_i')
pub_lg_1_i.group = 0
self.assertEqual(get_cit_type_list(pub_lg_1_i, "Hallo"), ValueError)
pub_lg_1_h_12 = input_test_func('doi_lg_1_h12')
pub_lg_1_h_12.group = 1
pub_lg_1_h_12_refs = get_cit_type_list(pub_lg_1_h_12, "Reference")
pub_lg_1_h_12_cits = get_cit_type_list(pub_lg_1_h_12, "Citation")
self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_refs), keep_only_dois(pub_lg_1_h_12.references))
self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_cits), keep_only_dois(pub_lg_1_h_12.citations))
pub_lg_1_d_12 = input_test_func('doi_lg_1_d12')
pub_lg_1_d_12.group = -1
pub_lg_1_d_12_refs = get_cit_type_list(pub_lg_1_d_12, "Reference")
pub_lg_1_d_12_cits = get_cit_type_list(pub_lg_1_d_12, "Citation")
self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_refs), keep_only_dois(pub_lg_1_d_12.references))
self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_cits), keep_only_dois(pub_lg_1_d_12.citations))
def test_create_graph_structure_citations(self):
print("Hallo")
def keep_only_dois(nodes):
......
......@@ -20,17 +20,17 @@ class UpdatingTest(unittest.TestCase):
# self.assertCountEqual(nodes_old,nodes_new)
# self.assertCountEqual(edges_old, edges_new)
# def test_deleted_input_dois(self):
# nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True)
# nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
# output_to_json(nodes_old_both, edges_old_both, test_var=True)
# nodes_new_single, edges_new_single = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True)
# self.assertCountEqual(nodes_old_single,nodes_new_single)
# self.assertCountEqual(edges_old_single, edges_new_single)
# nodes_old_single, edges_old_single = init_graph_construction(['doi_cg_i'],3,3,True)
# nodes_old_two, edges_old_two = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True)
# nodes_old_three, edges_old_three = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True)
def test_deleted_input_dois(self):
nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True)
nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
output_to_json(nodes_old_both, edges_old_both, test_var=True)
nodes_new_single, edges_new_single = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True)
self.assertCountEqual(nodes_old_single,nodes_new_single)
self.assertCountEqual(edges_old_single, edges_new_single)
nodes_old_single, edges_old_single = init_graph_construction(['doi_cg_i'],3,3,True)
nodes_old_two, edges_old_two = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True)
nodes_old_three, edges_old_three = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True)
def test_new_height(self):
nodes_height_0, edges_height_0 = init_graph_construction(['doi_lg_1_i'],2,0,True)
......
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_i", "name": "title_lg_2_i", "author": ["contributor_lg_2_i"], "year": "date_lg_2_i", "journal": "journal_lg_2_i", "group": "Input", "depth": 0, "citations": 4}, {"doi": "doi_lg_2_d11", "name": "title_lg_2_d11", "author": ["contributor_lg_2_d11"], "year": "date_lg_2_d11", "journal": "journal_lg_2_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_2_d12", "name": "title_lg_2_d12", "author": ["contributor_lg_2_d12"], "year": "date_lg_2_d12", "journal": "journal_lg_2_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_2_h11", "name": "title_lg_2_h11", "author": ["contributor_lg_2_h11"], "year": "date_lg_2_h11", "journal": "journal_lg_2_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_h12", "name": "title_lg_2_h12", "author": ["contributor_lg_2_h12"], "year": "date_lg_2_h12", "journal": "journal_lg_2_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_cg_i", "name": "title_cg_i", "author": ["contributor_cg_i"], "year": "date_cg_i", "journal": "journal_cg_i", "group": "Citedby", "depth": 1, "citations": 3}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h21", "name": "title_lg_2_h21", "author": ["contributor_lg_2_h21"], "year": "date_lg_2_h21", "journal": "journal_lg_2_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h22", "name": "title_lg_2_h22", "author": ["contributor_lg_2_h22"], "year": "date_lg_2_h22", "journal": "journal_lg_2_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h23", "name": "title_lg_2_h23", "author": ["contributor_lg_2_h23"], "year": "date_lg_2_h23", "journal": "journal_lg_2_h23", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_lg_2_h24", "name": "title_lg_2_h24", "author": ["contributor_lg_2_h24"], "year": "date_lg_2_h24", "journal": "journal_lg_2_h24", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_cg_h11", "name": "title_cg_h11", "author": ["contributor_cg_h11"], "year": "date_cg_h11", "journal": "journal_cg_h11", "group": "Citedby", "depth": 2, "citations": 2}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_2_d21", "name": "title_lg_2_d21", "author": ["contributor_lg_2_d21"], "year": "date_lg_2_d21", "journal": "journal_lg_2_d21", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d22", "name": "title_lg_2_d22", "author": ["contributor_lg_2_d22"], "year": "date_lg_2_d22", "journal": "journal_lg_2_d22", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d23", "name": "title_lg_2_d23", "author": ["contributor_lg_2_d23"], "year": "date_lg_2_d23", "journal": "journal_lg_2_d23", "group": "Reference", "depth": -2, "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d11"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d12"}, {"source": "doi_lg_2_h11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_h12", "target": "doi_lg_2_i"}, {"source": "doi_cg_i", "target": "doi_lg_2_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h21", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h22", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h23"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h24"}, {"source": "doi_lg_1_h23", "target": "doi_cg_i"}, {"source": "doi_cg_h11", "target": "doi_cg_i"}, {"source": "doi_lg_2_h11", "target": "doi_cg_i"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_2_d11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_d11", "target": "doi_lg_2_d21"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d22"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_d12"}]}
\ No newline at end of file
{"nodes": [{"doi": "doi_h02", "name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "group": "Input", "depth": 0, "citations": 1}, {"doi": "doi_h1", "name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "group": "Citedby", "depth": 1, "citations": 1}, {"doi": "doi_h2", "name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "group": "Citedby", "depth": 2, "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]}
\ No newline at end of file
......@@ -16,69 +16,138 @@ __status__ = "Production"
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from .import_from_json import input_from_json
from verarbeitung.construct_new_graph.initialize_graph import initialize_nodes_list, complete_inner_edges
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.export_to_json import output_to_json
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit
def find_furthermost_citations(node, old_search_depth, cit_type):
'''
:param node: node which is known but not from input group
:type node: Publication
def connect_old_and_new_input(json_file, new_doi_list, search_depth, search_height, test_var = False):
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param cit_type: determines whether the function call is for a reference or citation
:type cit_type: String
function to find the furthermost citation/reference for given node which is from the same group
'''
citations_saved = get_cit_type_list(node, cit_type)
# group of node and old search depth/height determines how often the loop needs to be repeated
for depth in range(old_search_depth - abs(node.group)):
new_citations = []
for citation in citations_saved:
for cit_node in nodes:
if citation.doi_url == cit_node.doi_url:
for new_cit in get_cit_type_list(cit_node, cit_type):
for new_cit_node in nodes:
if new_cit.doi_url == new_cit_node.doi_url:
new_cit_node.group -= node.group
new_citations.append(new_cit_node)
citations_saved = new_citations
# returns the references/citations which needs to be processed to complete contruction
return(citations_saved)
def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var):
'''
:param json_file: json file with old graph
:type json_file: json file
:param inserted_nodes: list of nodes which are inserted to new input array
:type inserted_nodes: List[String]
:param new_doi_list: additional dois which has to be connected to the old graph
:type new_doi_list: list of strings
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param search_depth: depth to search for references
:type search_depth: int
:param old_search_height: height to search for citations from old construction call
:type old_search_height: int
:param search_height: height to search for citations
:type search_height: int
:param new_search_depth: depth to search for references from new construction call
:type new_search_depth: int
:param new_search_height: height to search for citations from new construction call
:type new_search_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
connetcs the old graph and the new input dois to a complete new graph
completes the references and citations for nodes which were known in non input group
'''
global nodes, edges
nodes = []
edges = []
#changed_group_node_citations = []
#changed_group_node_references = []
nodes, edges = input_from_json(json_file)
# saves which nodes were handled because they were known before
handled_inserted_nodes = []
for node in nodes:
complete_changed_group_nodes(new_doi_list, search_depth, search_height, test_var)
# moves known reference node to input and completes citations and references for this node
if (node.group < 0) and (node.doi_url in inserted_nodes):
node_citations = create_global_lists_cit(nodes, edges, node, 1, new_search_height, "Citation", test_var)
add_citations(nodes, edges, node_citations, 1, new_search_height, "Citation", test_var)
old_max_references = find_furthermost_citations(node, old_search_depth, "Reference")
add_citations(nodes, edges, old_max_references, old_search_depth, new_search_depth, "Reference", test_var)
node.group = 0
handled_inserted_nodes.append(node)
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(new_doi_list,search_depth, search_height, test_var)
# moves known citation node to input and completes citations and references for this node
elif (node.group > 0) and (node.doi_url in inserted_nodes):
node_references = create_global_lists_cit(nodes, edges, node, 1, new_search_depth, "Reference", test_var)
add_citations(nodes, edges, node_references, 1, new_search_depth, "Reference", test_var)
old_max_citations = find_furthermost_citations(node, old_search_height, "Citation")
add_citations(nodes, edges, old_max_citations, old_search_height, new_search_height, "Citation", test_var)
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
node.group = 0
handled_inserted_nodes.append(node)
# adds edges between reference group and citation group of known publications
complete_inner_edges(test_var)
return(handled_inserted_nodes)
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges, test_var)
return(nodes, edges)
def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var = False):
'''
:param input_nodes_list: list of nodes which are processed for new construction call
:type input_nodes_list: List[Publication]
:param input_edges_list: list of links between nodes from input_nodes_list.
:type input_edges_list: List[List[String,String]]
def complete_changed_group_nodes(new_doi_list, search_depth_max, search_height_max, test_var):
'''
work in progress
:param inserted_nodes: list of nodes which are inserted to new input array
:type inserted_nodes: List[String]
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param old_search_height: height to search for citations from old construction call
:type old_search_height: int
:param new_search_depth: depth to search for references from new construction call
:type new_search_depth: int
:param new_search_height: height to search for citations from new construction call
:type new_search_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
completes the references and citations for nodes which were known in non input group
'''
changed_group_node_citations = []
changed_group_node_references = []
global nodes, edges
nodes = input_nodes_list
edges = input_edges_list
for node in nodes:
if (node.group < 0) and (node.doi in new_doi_list):
node.group = "input"
handled_inserted_nodes = complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var)
# copy all nodes from inserted_nodes to new node, if node is not in handled_inserted_nodes
not_handled_inserted_nodes = [node for node in inserted_nodes if node not in handled_inserted_nodes]
elif (node.group > 0) and (node.doi in new_doi_list):
node.group = "input"
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
#references_obj_list, citations_obj_list = initialize_nodes_list(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var)
# function calls to begin recursive processing up to max depth/height
#add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var)
#add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var)
init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges)
......@@ -90,6 +90,7 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list):
search_ref_graph_rec(pub)
search_cit_graph_rec(pub)
valid_edges = back_to_valid_edges(old_edges_list, usable_nodes)
return(usable_nodes, valid_edges)
\ No newline at end of file
back_to_valid_edges(old_edges_list, usable_nodes)
input_list = usable_nodes.copy()
print(input_list)
#return(usable_nodes, valid_edges)
......@@ -72,7 +72,10 @@ def get_old_max_references(old_depth):
old_max_references = []
for pub in processed_input_list:
if (abs(pub.group) == old_depth):
old_max_references.append(pub.references)
for reference in pub.references:
for ref_pub in processed_input_list:
if reference.doi_url == ref_pub.doi_url:
old_max_references.append(ref_pub)
return(old_max_references)
def get_old_max_citations(old_height):
......@@ -85,7 +88,10 @@ def get_old_max_citations(old_height):
old_max_citations = []
for pub in processed_input_list:
if (abs(pub.group) == old_height):
old_max_citations.append(pub.citations)
for citation in pub.citations:
for cit_pub in processed_input_list:
if citation.doi_url == cit_pub.doi_url:
old_max_citations.append(cit_pub)
return(old_max_citations)
def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var):
......@@ -131,7 +137,8 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var):
back_to_valid_edges(valid_edges, processed_input_list)
# adds edges between reference group and citation group of known publications
complete_inner_edges()
return(old_depth, old_height)
......
# -*- coding: utf-8 -*-
1# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals
......@@ -85,7 +85,6 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
# gets information from previous cunstruction call
old_obj_input_list , old_edges_list = input_from_json(json_file)
print(type(old_edges_list[1]))
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_list, valid_edges
......@@ -102,11 +101,11 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list)
delete_nodes_and_edges(processed_list, common_nodes, valid_edges)
update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
if (len(inserted_nodes) > 0):
connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var)
connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var)
return(processed_list, valid_edges)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment