Skip to content
Snippets Groups Projects
Commit 4d08744f authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

added height/depth adjustment

parent beb8a4a4
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
This commit is part of merge request !11. Comments created here will be created in the context of that merge request.
Showing
with 245 additions and 33 deletions
......@@ -54,4 +54,8 @@ coverage.xml
docs/_build/
# PyBuilder
target/
\ No newline at end of file
target/
#CodeCounter
.VSCodeCounter/
\ No newline at end of file
......@@ -184,4 +184,4 @@ def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth
edges = input_edges
process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)
return(nodes, edges)
\ No newline at end of file
#return(nodes, edges)
\ No newline at end of file
......@@ -60,7 +60,7 @@ def format_edges(edges):
return list_of_edge_dicts
def output_to_json(nodes, edges, test_var):
def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False):
'''
:param nodes: list of publications to export to json
:type nodes: List[Publication]
......@@ -78,9 +78,9 @@ def output_to_json(nodes, edges, test_var):
list_of_edge_dicts = format_edges(edges)
dict_of_all["nodes"] = list_of_node_dicts
dict_of_all["links"] = list_of_edge_dicts
if (test_var):
if (test_var and json_file == 'json_text.json'):
with open('test_output.json','w') as outfile:
json.dump(dict_of_all, outfile)
else:
with open('json_text.json','w') as outfile:
with open(json_file,'w') as outfile:
json.dump(dict_of_all, outfile)
......@@ -142,6 +142,6 @@ def init_graph_construction(doi_input_list, search_height, search_depth, test_va
complete_inner_edges()
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes, edges, test_var)
#output_to_json(nodes, edges, test_var)
return(nodes,edges)
......@@ -20,7 +20,7 @@ import sys
sys.path.append("../../")
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.update_graph.import_from_json import input_from_json
from verarbeitung.update_graph.update_graph import check_graph_updates
from verarbeitung.update_graph.update_graph import update_graph
# a function to print nodes and edges from a graph
def print_graph(nodes, edges):
......@@ -93,7 +93,7 @@ def try_delete_nodes():
# list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
# doi_list = []
# doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
# valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
# valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py)
# print_simple(valid_nodes, valid_edges)
def try_import():
......
This diff is collapsed.
{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]}
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
main function to call to generate a graph representing citations between multiple ACS/Nature journals
"""
def Processing(url):
print(url)
\ No newline at end of file
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from verarbeitung.construct_new_graph.export_to_json import output_to_json
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.update_graph.update_graph import update_graph
def Processing(url_list, search_depth, search_height, json_file = 'json_text.json'):
'''
:param url_list: list of urls to construct publication graph for
:type url_list: List[String]
:param search_depth: maximum depth to search for references
:type search_depth: int
:param search_height: maximum height to search for citations
:type search_height: int
:param json_file: file to export graph to
:type json_file: String
main function to construct new or updated publication graphs
'''
# updates graph if json file is known in directory otherwise starts new graph construction
try:
with open(json_file) as f:
nodes, edges = update_graph(url_list, json_file, search_depth, search_height)
except IOError:
nodes, edges = init_graph_construction(url_list, search_depth, search_height)
# exports graph to given json file name
output_to_json(nodes, edges, json_file)
\ No newline at end of file
import sys
from pathlib import Path
from verarbeitung.process_main import Processing
from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes
try_delete_nodes()
\ No newline at end of file
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
Processing(doi_list, 2, 2, 'test.json')
\ No newline at end of file
This diff is collapsed.
......@@ -2,17 +2,20 @@ import unittest
import sys
from pathlib import Path
sys.path.append("../")
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.export_to_json import output_to_json
from verarbeitung.update_graph.import_from_json import input_from_json
from verarbeitung.update_graph.update_graph import check_graph_updates
from verarbeitung.update_graph.update_graph import update_graph
class UpdatingTest(unittest.TestCase):
maxDiff = None
def test_import_from_json(self):
nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True)
output_to_json(nodes_old, edges_old, test_var = True)
nodes_new, edges_new = input_from_json('test_output.json')
self.assertCountEqual(nodes_old,nodes_new)
self.assertCountEqual(edges_old, edges_new)
......@@ -20,8 +23,8 @@ class UpdatingTest(unittest.TestCase):
def test_deleted_input_dois(self):
nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True)
nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True)
nodes_new_both, edges_new_both = input_from_json('test_output.json')
nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, 'test_output.json', 2, 2, True)
output_to_json(nodes_old_both, edges_old_both, test_var=True)
nodes_new_single, edges_new_single = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True)
self.assertCountEqual(nodes_old_single,nodes_new_single)
self.assertCountEqual(edges_old_single, edges_new_single)
......@@ -29,6 +32,29 @@ class UpdatingTest(unittest.TestCase):
nodes_old_two, edges_old_two = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True)
nodes_old_three, edges_old_three = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True)
def test_new_height(self):
nodes_height_0, edges_height_0 = init_graph_construction(['doi_lg_1_i'],2,0,True)
nodes_height_1, edges_height_1 = init_graph_construction(['doi_lg_1_i'],2,1,True)
nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True)
output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True)
nodes_new_height_1, edges_new_height_1 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True)
self.assertCountEqual(nodes_height_1, nodes_new_height_1)
self.assertCountEqual(edges_height_1, edges_new_height_1)
nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True)
output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True)
nodes_new_height_0, edges_new_height_0 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True)
self.assertCountEqual(nodes_height_0, nodes_new_height_0)
self.assertCountEqual(edges_height_0, edges_new_height_0)
def keep_only_dois(nodes):
'''
......@@ -36,7 +62,7 @@ def keep_only_dois(nodes):
:type nodes: List[Publication]
gets nodes of type pub and return only their doi
'''
'''
doi_list = []
for node in nodes:
doi_list.append(node.doi_url)
......
......@@ -78,10 +78,9 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list):
function to start recursive node removal for references and citations and to change edge list to valid state
'''
global usable_nodes, input_obj_list
usable_nodes = []
input_obj_list = input_list.copy()
input_obj_list = input_list
# starts for every common input node a tree-search and adds found nodes to usable_nodes
for common in common_nodes:
......
......@@ -34,7 +34,8 @@ def create_pubs_from_json(input_dict):
for node in input_dict["nodes"]:
#creates for the nodes the objects class Publication
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node["depth"] )
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], [])
pub.group = node["depth"]
#appends the objects to a list
list_of_nodes_py.append(pub)
......
# -*- coding: utf-8 -*-
"""
Functions to update the citation depth of recursive graph construction
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
sys.path.append("../../")
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges
from .Kanten_Vergleich import back_to_valid_edges
def reduce_max_depth_height(max_depth_height):
'''
:param max_depth_height: new maximum depth/height to reduce publications in publication list to
:type max_depth_height: int
function to remove all publications which are not in new maximum depth/height threshold
'''
for pub in processed_input_list:
if (abs(pub.group) > max_depth_height):
processed_input_list.remove(pub)
def get_old_height_depth():
'''
function to get old max height and max depth from previous construction call
'''
max_height = 0
max_depth = 0
for pub in processed_input_list:
if (pub.group < 0):
max_depth = max(max_depth, abs(pub.group))
if (pub.group > 0):
max_height = max(max_height, pub.group)
return(max_height, max_depth)
def get_old_max_references(old_depth):
'''
:param old_depth: old maximum depth to search for citations
:type old_depth: int
function to get references for new recursive levels
'''
old_max_references = []
for pub in processed_input_list:
if (abs(pub.group) == old_depth):
old_max_references.append(pub.references)
return(old_max_references)
def get_old_max_citations(old_height):
'''
:param old_height: old maximum height to search for citations
:type old_height: int
function to get citations for new recursive levels
'''
old_max_citations = []
for pub in processed_input_list:
if (abs(pub.group) == old_height):
old_max_citations.append(pub.citations)
return(old_max_citations)
def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var):
'''
:param obj_input_list: input list of publications of type Publication from update_graph
:type obj_input_list: List[Publication]
:param input_edges: list of publications from update_graph
:type input_edges: List[Publication]
:param new_depth: new maximum depth to search for references
:type new_depth: int
:param new_height: new maximum height to search for citations
:type new_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to adjust old publication search depth to update call
'''
global processed_input_list, valid_edges
processed_input_list = obj_input_list
valid_edges = input_edges
old_height, old_depth = get_old_height_depth()
# removes publications and links from recursion levels which aren't needed anymore
if (old_depth > new_depth):
reduce_max_depth_height(new_depth)
valid_edges = back_to_valid_edges(processed_input_list, input_edges)
elif (old_height > new_height):
reduce_max_depth_height(new_height)
valid_edges = back_to_valid_edges(processed_input_list, valid_edges)
# adds publications and links for new recursion levels
elif (old_depth < new_depth):
old_max_references = get_old_max_references()
add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var)
elif (old_height < new_height):
old_max_citations = get_old_max_citations()
add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges()
......@@ -14,16 +14,17 @@ __status__ = "Production"
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
import sys
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .Knoten_Vergleich import doi_listen_vergleichen
from .update_graph_del import delete_nodes_and_edges
from .delete_nodes_edges import delete_nodes_and_edges
from .connect_new_input import connect_old_and_new_input
from .update_depth import update_depth
from .import_from_json import input_from_json
def get_old_input_dois(old_obj_input_list):
......@@ -65,10 +66,10 @@ def get_new_input_dois(new_input, test_var):
return(new_input_dois)
def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, json_file, search_depth, search_height, test_var = False):
def update_graph(new_doi_input_list, json_file, search_depth, search_height, test_var = False):
'''
:param new_doi_input_list: input list of doi from UI
:type new_doi_input_list: list of strings
:type new_doi_input_list: List[String]
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
......@@ -82,14 +83,14 @@ def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list,
function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges
'''
# gets information from previous cunstruction call
old_obj_input_list , old_edges_list = input_from_json(json_file)
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_input_list, valid_edges
processed_input_list = old_obj_input_list.copy()
global processed_list, valid_edges
processed_list = old_obj_input_list
valid_edges = []
# save the return values of global lists
processed_input_list_del = []
valid_edges_del = []
# get dois from lists to compare for differences
old_doi_input_list = get_old_input_dois(old_obj_input_list)
......@@ -100,9 +101,11 @@ def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list,
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, common_nodes, old_edges_list)
processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list)
update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
if (len(inserted_nodes) > 0):
connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var)
return(processed_input_list_del, valid_edges_del)
return(processed_list, valid_edges)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment