Skip to content
Snippets Groups Projects
Commit cd1c5443 authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

added height/depth adjustment

parent 9a8790e8
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
Showing
with 245 additions and 33 deletions
......@@ -54,4 +54,8 @@ coverage.xml
docs/_build/
# PyBuilder
target/
\ No newline at end of file
target/
#CodeCounter
.VSCodeCounter/
\ No newline at end of file
......@@ -184,4 +184,4 @@ def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth
edges = input_edges
process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)
return(nodes, edges)
\ No newline at end of file
#return(nodes, edges)
\ No newline at end of file
......@@ -60,7 +60,7 @@ def format_edges(edges):
return list_of_edge_dicts
def output_to_json(nodes, edges, test_var):
def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False):
'''
:param nodes: list of publications to export to json
:type nodes: List[Publication]
......@@ -78,9 +78,9 @@ def output_to_json(nodes, edges, test_var):
list_of_edge_dicts = format_edges(edges)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
if (test_var):
if (test_var and json_file == 'json_text.json'):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
else:
with open('json_text.json','w') as outfile:
with open(json_file,'w') as outfile:
json.dump(dict_of_all, outfile)
......@@ -142,6 +142,6 @@ def init_graph_construction(doi_input_list, search_height, search_depth, test_va
complete_inner_edges()
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes, edges, test_var)
#output_to_json(nodes, edges, test_var)
return(nodes,edges)
......@@ -20,7 +20,7 @@ import sys
sys.path.append("../../")
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.update_graph.import_from_json import input_from_json
from verarbeitung.update_graph.update_graph import check_graph_updates
from verarbeitung.update_graph.update_graph import update_graph
# a function to print nodes and edges from a graph
def print_graph(nodes, edges):
......@@ -93,7 +93,7 @@ def try_delete_nodes():
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
# valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
......
This diff is collapsed.
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]}
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
main function to call to generate a graph representing citations between multiple ACS/Nature journals
"""
def Processing(url):
print(url)
\ No newline at end of file
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from verarbeitung.construct_new_graph.export_to_json import output_to_json
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.update_graph.update_graph import update_graph
def Processing(url_list, search_depth, search_height, json_file = 'json_text.json'):
'''
:param url_list: list of urls to construct publication graph for
:type url_list: List[String]
:param search_depth: maximum depth to search for references
:type search_depth: int
:param search_height: maximum height to search for citations
:type search_height: int
:param json_file: file to export graph to
:type json_file: String
main function to construct new or updated publication graphs
'''
# updates graph if json file is known in directory otherwise starts new graph construction
try:
with open(json_file) as f:
nodes, edges = update_graph(url_list, json_file, search_depth, search_height)
except IOError:
nodes, edges = init_graph_construction(url_list, search_depth, search_height)
# exports graph to given json file name
output_to_json(nodes, edges, json_file)
\ No newline at end of file
import sys
from pathlib import Path
from verarbeitung.process_main import Processing
from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes
try_delete_nodes()
\ No newline at end of file
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
Processing(doi_list, 2, 2, 'test.json')
\ No newline at end of file
This diff is collapsed.
......@@ -2,17 +2,20 @@ import unittest
import sys
from pathlib import Path
sys.path.append("../")
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.export_to_json import output_to_json
from verarbeitung.update_graph.import_from_json import input_from_json
from verarbeitung.update_graph.update_graph import check_graph_updates
from verarbeitung.update_graph.update_graph import update_graph
class UpdatingTest(unittest.TestCase):
maxDiff = None
def test_import_from_json(self):
nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True)
output_to_json(nodes_old, edges_old, test_var = True)
nodes_new, edges_new = input_from_json('test_output.json')
self.assertCountEqual(nodes_old,nodes_new)
self.assertCountEqual(edges_old, edges_new)
......@@ -20,8 +23,8 @@ class UpdatingTest(unittest.TestCase):
def test_deleted_input_dois(self):
nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True)
nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
nodes_new_both, edges_new_both = input_from_json('test_output.json')
nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, 'test_output.json', 2, 2, True)
output_to_json(nodes_old_both, edges_old_both, test_var=True)
nodes_new_single, edges_new_single = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True)
self.assertCountEqual(nodes_old_single,nodes_new_single)
self.assertCountEqual(edges_old_single, edges_new_single)
......@@ -29,6 +32,29 @@ class UpdatingTest(unittest.TestCase):
nodes_old_two, edges_old_two = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True)
nodes_old_three, edges_old_three = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True)
def test_new_height(self):
nodes_height_0, edges_height_0 = init_graph_construction(['doi_lg_1_i'],2,0,True)
nodes_height_1, edges_height_1 = init_graph_construction(['doi_lg_1_i'],2,1,True)
nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True)
output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True)
nodes_new_height_1, edges_new_height_1 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True)
self.assertCountEqual(nodes_height_1, nodes_new_height_1)
self.assertCountEqual(edges_height_1, edges_new_height_1)
nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True)
output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True)
nodes_new_height_0, edges_new_height_0 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True)
self.assertCountEqual(nodes_height_0, nodes_new_height_0)
self.assertCountEqual(edges_height_0, edges_new_height_0)
def keep_only_dois(nodes):
'''
......@@ -36,7 +62,7 @@ def keep_only_dois(nodes):
:type nodes: List[Publication]
gets nodes of type pub and return only their doi
'''
'''
doi_list = []
for node in nodes:
doi_list.append(node.doi_url)
......
......@@ -78,10 +78,9 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list):
function to start recursive node removal for references and citations and to change edge list to valid state
'''
global usable_nodes, input_obj_list
usable_nodes = []
input_obj_list = input_list.copy()
input_obj_list = input_list
# starts for every common input node a tree-search and adds found nodes to usable_nodes
for common in common_nodes:
......
......@@ -34,7 +34,8 @@ def create_pubs_from_json(input_dict):
for node in input_dict["nodes"]:
#creates for the nodes the objects class Publication
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node["depth"] )
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], [])
pub.group = node["depth"]
#appends the objects to a list
list_of_nodes_py.append(pub)
......
# -*- coding: utf-8 -*-
"""
Functions to update the citation depth of recursive graph construction
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
sys.path.append("../../")
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges
from .Kanten_Vergleich import back_to_valid_edges
def reduce_max_depth_height(max_depth_height):
'''
:param max_depth_height: new maximum depth/height to reduce publications in publication list to
:type max_depth_height: int
function to remove all publications which are not in new maximum depth/height threshold
'''
for pub in processed_input_list:
if (abs(pub.group) > max_depth_height):
processed_input_list.remove(pub)
def get_old_height_depth():
'''
function to get old max height and max depth from previous construction call
'''
max_height = 0
max_depth = 0
for pub in processed_input_list:
if (pub.group < 0):
max_depth = max(max_depth, abs(pub.group))
if (pub.group > 0):
max_height = max(max_height, pub.group)
return(max_height, max_depth)
def get_old_max_references(old_depth):
'''
:param old_depth: old maximum depth to search for citations
:type old_depth: int
function to get references for new recursive levels
'''
old_max_references = []
for pub in processed_input_list:
if (abs(pub.group) == old_depth):
old_max_references.append(pub.references)
return(old_max_references)
def get_old_max_citations(old_height):
'''
:param old_height: old maximum height to search for citations
:type old_height: int
function to get citations for new recursive levels
'''
old_max_citations = []
for pub in processed_input_list:
if (abs(pub.group) == old_height):
old_max_citations.append(pub.citations)
return(old_max_citations)
def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var):
'''
:param obj_input_list: input list of publications of type Publication from update_graph
:type obj_input_list: List[Publication]
:param input_edges: list of publications from update_graph
:type input_edges: List[Publication]
:param new_depth: new maximum depth to search for references
:type new_depth: int
:param new_height: new maximum height to search for citations
:type new_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to adjust old publication search depth to update call
'''
global processed_input_list, valid_edges
processed_input_list = obj_input_list
valid_edges = input_edges
old_height, old_depth = get_old_height_depth()
# removes publications and links from recursion levels which aren't needed anymore
if (old_depth > new_depth):
reduce_max_depth_height(new_depth)
valid_edges = back_to_valid_edges(processed_input_list, input_edges)
elif (old_height > new_height):
reduce_max_depth_height(new_height)
valid_edges = back_to_valid_edges(processed_input_list, valid_edges)
# adds publications and links for new recursion levels
elif (old_depth < new_depth):
old_max_references = get_old_max_references()
add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var)
elif (old_height < new_height):
old_max_citations = get_old_max_citations()
add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges()
......@@ -14,16 +14,17 @@ __status__ = "Production"
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
import sys
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .Knoten_Vergleich import doi_listen_vergleichen
from .update_graph_del import delete_nodes_and_edges
from .delete_nodes_edges import delete_nodes_and_edges
from .connect_new_input import connect_old_and_new_input
from .update_depth import update_depth
from .import_from_json import input_from_json
def get_old_input_dois(old_obj_input_list):
......@@ -65,10 +66,10 @@ def get_new_input_dois(new_input, test_var):
return(new_input_dois)
def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, json_file, search_depth, search_height, test_var = False):
def update_graph(new_doi_input_list, json_file, search_depth, search_height, test_var = False):
'''
:param new_doi_input_list: input list of doi from UI
:type new_doi_input_list: list of strings
:type new_doi_input_list: List[String]
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
......@@ -82,14 +83,14 @@ def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list,
function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges
'''
# gets information from previous cunstruction call
old_obj_input_list , old_edges_list = input_from_json(json_file)
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_input_list, valid_edges
processed_input_list = old_obj_input_list.copy()
global processed_list, valid_edges
processed_list = old_obj_input_list
valid_edges = []
# save the return values of global lists
processed_input_list_del = []
valid_edges_del = []
# get dois from lists to compare for differences
old_doi_input_list = get_old_input_dois(old_obj_input_list)
......@@ -100,9 +101,11 @@ def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list,
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, common_nodes, old_edges_list)
processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list)
update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
if (len(inserted_nodes) > 0):
connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var)
return(processed_input_list_del, valid_edges_del)
return(processed_list, valid_edges)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment