Skip to content
Snippets Groups Projects
Commit 2e5dfde9 authored by Katja's avatar Katja
Browse files

Merge remote-tracking branch 'upstream/main' into main

parents 059e8e0b 5701ab3d
Branches
No related tags found
1 merge request!10Output
# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from .import_from_json import input_from_json
from verarbeitung.construct_new_graph.initialize_graph import initialize_nodes_list, complete_inner_edges
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.export_to_json import output_to_json
def connect_old_and_new_input(json_file, new_doi_list, search_depth, search_height, test_var = False):
'''
:param json_file: json file with old graph
:type json_file: json file
:param new_doi_list: additional dois which has to be connected to the old graph
:type new_doi_list: list of strings
:param search_depth: depth to search for references
:type search_depth: int
:param search_height: height to search for citations
:type search_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
connetcs the old graph and the new input dois to a complete new graph
'''
global nodes, edges
nodes = []
edges = []
nodes, edges = input_from_json(json_file)
complete_changed_group_nodes(new_doi_list, search_depth, search_height, test_var)
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(new_doi_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges(test_var)
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges, test_var)
return(nodes, edges)
def complete_changed_group_nodes(new_doi_list, search_depth_max, search_height_max, test_var):
'''
work in progress
'''
changed_group_node_citations = []
changed_group_node_references = []
for node in nodes:
if (node.group < 0) and (node.doi in new_doi_list):
node.group = "input"
elif (node.group > 0) and (node.doi in new_doi_list):
node.group = "input"
# -*- coding: utf-8 -*-
"""
Functions to remove publications/links from nodes/edges list, if they can no longer be reached
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
sys.path.append("../../")
from .Kanten_Vergleich import back_to_valid_edges
def search_ref_graph_rec(pub):
'''
:param pub: pub go get appended to usable_nodes
:type pub: Publication
function that appends nodes of group "reference" to list usable_nodes, if they are reachable from input nodes
'''
for reference in pub.references:
for ref_pub in input_obj_list:
if ((reference.doi_url == ref_pub.doi_url) and (ref_pub not in usable_nodes)):
usable_nodes.append(ref_pub)
# to find a cyclus and avoid recursion error
not_in_citations = True
for citation in pub.citations:
if (reference.doi_url == citation.doi_url):
not_in_citations = False
break
if (not_in_citations):
search_ref_graph_rec(ref_pub)
def search_cit_graph_rec(pub):
'''
:param pub: pub go get appended to usable_nodes
:type pub: Publication
function that appends nodes of group "citation" to list usable_nodes, if they are reachable from input nodes
'''
for citation in pub.citations:
for cit_pub in input_obj_list:
if ((citation.doi_url == cit_pub.doi_url) and (cit_pub not in usable_nodes)):
usable_nodes.append(cit_pub)
# to find a cyclus and avoid recursion error
not_in_references = True
for reference in pub.references:
if (citation.doi_url == reference.doi_url):
not_in_references = False
break
if (not_in_references):
search_cit_graph_rec(cit_pub)
def delete_nodes_and_edges(input_list, common_nodes, old_edges_list):
'''
:param input_list: list of publications to get reduced
:type input_list: List[Publication]
:param common_nodes: list of input dois which are in old and new input call
:type common_nodes: List[String]
:param old_edges_list: list of links between publications from old call
:type old_edges_list: List[List[String,String]]
function to start recursive node removal for references and citations and to change edge list to valid state
'''
global usable_nodes, input_obj_list
usable_nodes = []
input_obj_list = input_list
# starts for every common input node a tree-search and adds found nodes to usable_nodes
for common in common_nodes:
for pub in input_obj_list:
if (common == pub.doi_url):
usable_nodes.append(pub)
search_ref_graph_rec(pub)
search_cit_graph_rec(pub)
valid_edges = back_to_valid_edges(old_edges_list, usable_nodes)
return(usable_nodes, valid_edges)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to read old json files to recreate old graph structure
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import json
import sys
sys.path.append("../")
from input.publication import Publication, Citation
def create_pubs_from_json(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
creates list of publication retrieved from old json file
'''
#iterates over the list of nodes
for node in input_dict["nodes"]:
#creates for the nodes the objects class Publication
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], [])
pub.group = node["depth"]
#appends the objects to a list
list_of_nodes_py.append(pub)
def add_ref_and_cit_to_pubs(input_dict):
'''
:param input_dict: dictionary read from old graph Json File
:type json_file: dictionary
adds references and citations to retrieved publication list
'''
# iterates over the list of edges
for edge in input_dict["links"]:
for source in list_of_nodes_py:
for target in list_of_nodes_py:
# when correct dois found, adds then as references/citatons to publication list
if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])):
new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference")
source.references.append(new_reference)
new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation")
target.citations.append(new_citation)
# adds edge to list
list_of_edges_py.append([edge["source"],edge["target"]])
def input_from_json(json_file):
'''
:param json_file: Json-Datei for the old graph
:type json_file: String
retrieves information from old json file to be reused for new graph construction
'''
# creates global sets for nodes and edges
global list_of_nodes_py, list_of_edges_py
list_of_nodes_py = []
list_of_edges_py = []
#opens the json file and saves content in dictionary
with open(json_file,'r') as file:
input_dict = json.load(file)
# creates nodes of Class Publication from input Json file
create_pubs_from_json(input_dict)
# adds references and citations to publications and creates edges
add_ref_and_cit_to_pubs(input_dict)
return(list_of_nodes_py, list_of_edges_py)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to update the citation depth of recursive graph construction
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
sys.path.append("../../")
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges
from .Kanten_Vergleich import back_to_valid_edges
def reduce_max_height(max_height):
'''
:param max_height: new maximum height to reduce publications in publication list to
:type max_height: int
function to remove all publications which are not in new maximum height threshold
'''
input_list_del = processed_input_list.copy()
for pub in input_list_del:
if (pub.group > 0):
if (pub.group > max_height):
processed_input_list.remove(pub)
def reduce_max_depth(max_depth):
'''
:param max_depth: new maximum depth to reduce publications in publication list to
:type max_depth: int
function to remove all publications which are not in new maximum depth threshold
'''
input_list_del = processed_input_list.copy()
for pub in input_list_del:
if (pub.group < 0):
if (abs(pub.group) > max_depth):
processed_input_list.remove(pub)
def get_old_height_depth():
'''
function to get old max height and max depth from previous construction call
'''
max_height = 0
max_depth = 0
for pub in processed_input_list:
if (pub.group < 0):
max_depth = max(max_depth, abs(pub.group))
if (pub.group > 0):
max_height = max(max_height, pub.group)
return(max_height, max_depth)
def get_old_max_references(old_depth):
'''
:param old_depth: old maximum depth to search for citations
:type old_depth: int
function to get references for new recursive levels
'''
old_max_references = []
for pub in processed_input_list:
if (abs(pub.group) == old_depth):
old_max_references.append(pub.references)
return(old_max_references)
def get_old_max_citations(old_height):
'''
:param old_height: old maximum height to search for citations
:type old_height: int
function to get citations for new recursive levels
'''
old_max_citations = []
for pub in processed_input_list:
if (abs(pub.group) == old_height):
old_max_citations.append(pub.citations)
return(old_max_citations)
def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var):
'''
:param obj_input_list: input list of publications of type Publication from update_graph
:type obj_input_list: List[Publication]
:param input_edges: list of publications from update_graph
:type input_edges: List[Publication]
:param new_depth: new maximum depth to search for references
:type new_depth: int
:param new_height: new maximum height to search for citations
:type new_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to adjust old publication search depth to update call
'''
global processed_input_list, valid_edges
processed_input_list = obj_input_list
valid_edges = input_edges
old_height, old_depth = get_old_height_depth()
# removes publications and links from recursion levels which aren't needed anymore
if (old_depth > new_depth):
reduce_max_depth(new_depth)
elif (old_height > new_height):
reduce_max_height(new_height)
# adds publications and links for new recursion levels
elif (old_depth < new_depth):
old_max_references = get_old_max_references()
add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var)
elif (old_height < new_height):
old_max_citations = get_old_max_citations()
add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var)
back_to_valid_edges(valid_edges, processed_input_list)
# adds edges between reference group and citation group of known publications
complete_inner_edges()
# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .Knoten_Vergleich import doi_listen_vergleichen
from .delete_nodes_edges import delete_nodes_and_edges
from .connect_new_input import connect_old_and_new_input
from .update_depth import update_depth
from .import_from_json import input_from_json
def get_old_input_dois(old_obj_input_list):
'''
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
function to return pub dois for old publications of group input retrieved from json file
'''
# new list to save doi_url for each old publication of group input
old_input_dois = []
for pub in old_obj_input_list:
if (pub.group == 0):
old_input_dois.append(pub.doi_url)
return old_input_dois
def get_new_input_dois(new_input, test_var):
'''
:param new_input: input list of doi from UI
:type new_input: list of strings
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to return pub dois for input urls
'''
# new list to save doi_url for each new input url
new_input_dois = []
for new_node in new_input:
# retrieves information and adds to new list if successful
pub = get_pub(new_node, test_var)
if (type(pub) != Publication):
print(pub)
continue
new_input_dois.append(pub.doi_url)
return(new_input_dois)
def update_graph(new_doi_input_list, json_file, search_depth, search_height, test_var = False):
'''
:param new_doi_input_list: input list of doi from UI
:type new_doi_input_list: List[String]
:param old_obj_input_list: list of publications retrieved from old json file
:type old_obj_input_list: List[Publication]
:param old_edges_list: list of links between publications retrieved from old json file
:type old_edges_list: List[List[String,String]]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges
'''
# gets information from previous cunstruction call
old_obj_input_list , old_edges_list = input_from_json(json_file)
print(type(old_edges_list[1]))
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_list, valid_edges
processed_list = old_obj_input_list
valid_edges = old_edges_list
# get dois from lists to compare for differences
old_doi_input_list = get_old_input_dois(old_obj_input_list)
new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var)
# retrieve which publications are already known, removed, inserted
common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list)
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list)
update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
if (len(inserted_nodes) > 0):
connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var)
return(processed_list, valid_edges)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment