Select Git revision
connect_new_input.py
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
connect_new_input.py 12.21 KiB
# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
# __copyright__ = ""
# __credits__ = ["", "", "", ""]
# __license__ = ""
# __version__ = ""
# __maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit
def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, new_search_depth, cit_type):
global nodes, edges
nodes = test_nodes
edges = test_edges
return (find_furthermost_citations(nodes, edges, changed_node, old_search_depth, new_search_depth, cit_type))
def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height,
new_search_depth, new_search_height):
global nodes, edges
nodes = test_nodes
edges = test_edges
handled_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_test_nodes, old_search_depth,
old_search_height, new_search_depth,
new_search_height, True)
return (new_nodes, new_edges, handled_nodes)
def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new_search_depth, cit_type):
'''
:param new_nodes: list of nodes which are generated separately from main node list to avoid recursive problems
:type new_nodes List[Publication]
:param new_edges: list of edges which are generated separately from main edge list to avoid recursive problems
:type new_edges: List[List[String,String]]
:param node: node which is known but not from input group
:type node: Publication
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param cit_type: determines whether the function call is for a reference or citation
:type cit_type: String
function to find the furthermost citation/reference for given node which is from the same group and
adds all found nodes between input node and furthermost citations.
It return a list of furthermost citations.
'''
citations_saved = [node]
# group of node and old search depth/height determines how often the loop needs to be repeated
for depth in range(min(old_search_depth - abs(node.group), new_search_depth)):
new_citations = []
for citation in citations_saved:
for cit_node in nodes:
if citation.doi_url == cit_node.doi_url:
for new_cit in get_cit_type_list(cit_node, cit_type):
for new_cit_node in nodes:
if new_cit.doi_url == new_cit_node.doi_url:
if cit_type == "Citation":
# to find a cycle and not change height
not_in_citations = True
for new_cit_node_citation in new_cit_node.citations:
if (cit_node.doi_url == new_cit_node_citation.doi_url):
not_in_citations = False
break
if (not_in_citations):
new_citations.append(new_cit_node)
# change height accordingly and add link to edge
new_cit_node.group = node.group + depth
if [cit_node.doi_url, cit_node.doi_url] not in new_edges:
new_edges.append([new_cit_node.doi_url, cit_node.doi_url])
elif cit_type == "Reference":
# to find a cycle and not change depth
not_in_citations = True
for new_cit_node_reference in new_cit_node.references:
if (new_cit_node.doi_url == new_cit_node_reference.doi_url):
not_in_citations = False
break
if (not_in_citations):
new_citations.append(new_cit_node)
# change height accordingly and add link to edge
new_cit_node.group = node.group + depth
if [cit_node.doi_url, new_cit_node.doi_url] not in new_edges:
new_edges.append([cit_node.doi_url, new_cit_node.doi_url])
citations_saved = new_citations
for new_citation in new_citations:
if new_citation not in new_nodes:
new_nodes.append(new_citation)
# returns the references/citations which needs to be processed to complete construction
return (citations_saved)
def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth,
new_search_height, test_var):
'''
:param inserted_nodes: list of nodes which are inserted to new input array
:type inserted_nodes: List[String]
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param old_search_height: height to search for citations from old construction call
:type old_search_height: int
:param new_search_depth: depth to search for references from new construction call
:type new_search_depth: int
:param new_search_height: height to search for citations from new construction call
:type new_search_height: int
:param test_var: variable to differentiate between test and url call
:type test_var: boolean
completes the references and citations for nodes which were known in non input group
'''
# changed_group_node_citations = []
# changed_group_node_references = []
# saves which nodes were handled because they were known before
handled_inserted_nodes = []
new_nodes = []
new_edges = []
for node in nodes:
# moves known reference node to input and completes citations and references for this node
if (node.group < 0) and (node.doi_url in inserted_nodes):
# get pub from input
pub = get_pub(node.doi_url, test_var)
if (type(pub) != Publication):
error_doi_list.append(node.doi_url)
continue
# find old maximum publications and complete tree to new max depth
pub.group = node.group
old_max_references = find_furthermost_citations(new_nodes, new_edges, pub, old_search_depth,
new_search_depth, "Reference")
add_citations(new_nodes, new_edges, old_max_references,
min(old_search_depth - abs(node.group), new_search_depth), new_search_depth, "Reference",
test_var)
# add tree for citations
add_citations(new_nodes, new_edges, [pub], 0, new_search_height, "Citation", test_var)
pub.group = 0
new_nodes.append(pub)
handled_inserted_nodes.append(node)
# moves known citation node to input and completes citations and references for this node
elif (node.group > 0) and (node.doi_url in inserted_nodes):
# get pub from input
pub = get_pub(node.doi_url, test_var)
if (type(pub) != Publication):
error_doi_list.append(node.doi_url)
continue
# find old maximum publications and complete tree to new max depth
pub.group = node.group
old_max_citations = find_furthermost_citations(new_nodes, new_edges, pub, old_search_height,
new_search_height, "Citation")
add_citations(new_nodes, new_edges, old_max_citations,
min(old_search_height - abs(node.group), new_search_height), new_search_height, "Citation",
test_var)
# add tree for citations
add_citations(new_nodes, new_edges, [pub], 0, new_search_depth, "Reference", test_var)
pub.group = 0
new_nodes.append(pub)
handled_inserted_nodes.append(node)
# ensure, input pubs are declared as group 0
for new_node in new_nodes:
for inserted_node in inserted_nodes:
if new_node.doi_url == inserted_node:
new_node.group = 0
return (handled_inserted_nodes, new_nodes, new_edges)
def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height,
new_search_depth, new_search_height, test_var=False):
'''
:param input_nodes_list: list of nodes which are processed for new construction call
:type input_nodes_list: List[Publication]
:param input_edges_list: list of links between nodes from input_nodes_list.
:type input_edges_list: List[List[String,String]]
:param inserted_nodes: list of nodes which are inserted to new input array
:type inserted_nodes: List[String]
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param old_search_height: height to search for citations from old construction call
:type old_search_height: int
:param new_search_depth: depth to search for references from new construction call
:type new_search_depth: int
:param new_search_height: height to search for citations from new construction call
:type new_search_height: int
:param test_var: variable to differentiate between test and url call
:type test_var: boolean
completes the references and citations for nodes which were known in non input group
'''
global nodes, edges, error_doi_list
nodes = input_nodes_list.copy()
edges = input_edges_list.copy()
error_doi_list = []
handled_inserted_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_nodes, old_search_depth,
old_search_height, new_search_depth,
new_search_height, test_var)
# copy all nodes from inserted_nodes to new node, if node is not in handled_inserted_nodes
not_handled_inserted_nodes = [node for node in inserted_nodes if node not in handled_inserted_nodes]
# function call to begin recursive processing up to max depth/height for unhandled nodes
if len(not_handled_inserted_nodes) > 0:
new_nodes, new_edges, error_doi_list_new = init_graph_construction(not_handled_inserted_nodes, new_search_depth,
new_search_height, test_var=test_var,
update_var=True, input_nodes=new_nodes,
input_edges=new_edges)
for err_node in error_doi_list_new:
if err_node not in error_doi_list:
error_doi_list.append(err_node)
return (new_nodes, new_edges, error_doi_list)