Skip to content
Snippets Groups Projects
Commit 7ed27902 authored by Ehlers, Katja's avatar Ehlers, Katja
Browse files

Merge branch 'main' into 'output'

please

See merge request bax9187/projekt-cis-biochemie-2021-22!6
parents 85ece1d0 e4fd1022
No related branches found
No related tags found
No related merge requests found
# -*- coding: utf-8 -*-
"""
Functions to read old json files to recreate old graph structure
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import json
import sys
sys.path.append("../")
from input.publication import Publication, Citation
def create_pubs_from_json(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
creates list of publication retrieved from old json file
'''
#iterates over the list of nodes
for node in input_dict["nodes"]:
#creates for the nodes the objects class Publication
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], [])
pub.group = node["depth"]
#appends the objects to a list
list_of_nodes_py.append(pub)
def add_ref_and_cit_to_pubs(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
adds references and citations to retrieved publication list
'''
# iterates over the list of edges
for edge in input_dict["links"]:
for source in list_of_nodes_py:
for target in list_of_nodes_py:
# when correct dois found, adds then as references/citatons to publication list
if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])):
new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference")
source.references.append(new_reference)
new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation")
target.citations.append(new_citation)
# adds edge to list
list_of_edges_py.append([edge["source"],edge["target"]])
def input_from_json(json_file):
'''
:param json_file: Json-Datei for the old graph
:type json_file: String
retrieves information from old json file to be reused for new graph construction
'''
# creates global sets for nodes and edges
global list_of_nodes_py, list_of_edges_py
list_of_nodes_py = []
list_of_edges_py = []
#opens the json file and saves content in dictionary
with open(json_file,'r') as file:
input_dict = json.load(file)
# creates nodes of Class Publication from input Json file
create_pubs_from_json(input_dict)
# adds references and citations to publications and creates edges
add_ref_and_cit_to_pubs(input_dict)
return(list_of_nodes_py, list_of_edges_py)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to update the citation depth of recursive graph construction
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
sys.path.append("../../")
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges
from .Kanten_Vergleich import back_to_valid_edges
def reduce_max_height(max_height):
'''
:param max_height: new maximum height to reduce publications in publication list to
:type max_height: int
function to remove all publications which are not in new maximum height threshold
'''
input_list_del = processed_input_list.copy()
for pub in input_list_del:
if (pub.group > 0):
if (pub.group > max_height):
processed_input_list.remove(pub)
def reduce_max_depth(max_depth):
'''
:param max_depth: new maximum depth to reduce publications in publication list to
:type max_depth: int
function to remove all publications which are not in new maximum depth threshold
'''
input_list_del = processed_input_list.copy()
for pub in input_list_del:
if (pub.group < 0):
if (abs(pub.group) > max_depth):
processed_input_list.remove(pub)
def get_old_height_depth():
'''
function to get old max height and max depth from previous construction call
'''
max_height = 0
max_depth = 0
for pub in processed_input_list:
if (pub.group < 0):
max_depth = max(max_depth, abs(pub.group))
if (pub.group > 0):
max_height = max(max_height, pub.group)
return(max_height, max_depth)
def get_old_max_references(old_depth):
'''
:param old_depth: old maximum depth to search for citations
:type old_depth: int
function to get references for new recursive levels
'''
old_max_references = []
for pub in processed_input_list:
if (abs(pub.group) == old_depth):
old_max_references.append(pub.references)
return(old_max_references)
def get_old_max_citations(old_height):
'''
:param old_height: old maximum height to search for citations
:type old_height: int
function to get citations for new recursive levels
'''
old_max_citations = []
for pub in processed_input_list:
if (abs(pub.group) == old_height):
old_max_citations.append(pub.citations)
return(old_max_citations)
def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var):
'''
:param obj_input_list: input list of publications of type Publication from update_graph
:type obj_input_list: List[Publication]
:param input_edges: list of publications from update_graph
:type input_edges: List[Publication]
:param new_depth: new maximum depth to search for references
:type new_depth: int
:param new_height: new maximum height to search for citations
:type new_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to adjust old publication search depth to update call
'''
global processed_input_list, valid_edges
processed_input_list = obj_input_list
valid_edges = input_edges
old_height, old_depth = get_old_height_depth()
# removes publications and links from recursion levels which aren't needed anymore
if (old_depth > new_depth):
reduce_max_depth(new_depth)
elif (old_height > new_height):
reduce_max_height(new_height)
# adds publications and links for new recursion levels
elif (old_depth < new_depth):
old_max_references = get_old_max_references()
add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var)
elif (old_height < new_height):
old_max_citations = get_old_max_citations()
add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var)
back_to_valid_edges(valid_edges, processed_input_list)
# adds edges between reference group and citation group of known publications
complete_inner_edges()
# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .Knoten_Vergleich import doi_listen_vergleichen
from .delete_nodes_edges import delete_nodes_and_edges
from .connect_new_input import connect_old_and_new_input
from .update_depth import update_depth
from .import_from_json import input_from_json
def get_old_input_dois(old_obj_input_list):
'''
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
function to return pub dois for old publications of group input retrieved from json file
'''
# new list to save doi_url for each old publication of group input
old_input_dois = []
for pub in old_obj_input_list:
if (pub.group == 0):
old_input_dois.append(pub.doi_url)
return old_input_dois
def get_new_input_dois(new_input, test_var):
'''
:param new_input: input list of doi from UI
:type new_input: list of strings
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to return pub dois for input urls
'''
# new list to save doi_url for each new input url
new_input_dois = []
for new_node in new_input:
# retrieves information and adds to new list if successful
pub = get_pub(new_node, test_var)
if (type(pub) != Publication):
print(pub)
continue
new_input_dois.append(pub.doi_url)
return(new_input_dois)
def update_graph(new_doi_input_list, json_file, search_depth, search_height, test_var = False):
'''
:param new_doi_input_list: input list of doi from UI
:type new_doi_input_list: List[String]
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
:param old_edges_list: list of links between publications retrieved from old json file
:type old_edges_list: List[List[String,String]]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges
'''
# gets information from previous cunstruction call
old_obj_input_list , old_edges_list = input_from_json(json_file)
print(type(old_edges_list[1]))
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_list, valid_edges
processed_list = old_obj_input_list
valid_edges = old_edges_list
# get dois from lists to compare for differences
old_doi_input_list = get_old_input_dois(old_obj_input_list)
new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var)
# retrieve which publications are already known, removed, inserted
common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list)
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list)
update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
if (len(inserted_nodes) > 0):
connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var)
return(processed_list, valid_edges)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment