Select Git revision
add_references_rec.py
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
add_references_rec.py 6.02 KiB
# -*- coding: utf-8 -*-
"""
Functions to add references recursivly for multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def create_global_lists_ref(input_nodes, input_edges, pub, search_depth, search_depth_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param pub: Publication which references will be added
:type pub: Publication
:param search_height: current height to search for references
:type search_height_max: int
:param search_height_max: maximum height to search for references
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to create nodes and edges and call create_graph_structure_references
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
return create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Publication
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every referenced publication unknown
adds edges to added references
'''
references_pub_obj_list = []
for reference in pub.references: #iterates over the references of the considered paper
not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
for node in nodes: #iterates over all nodes in set of nodes #
if (reference == node.doi_url): #determines that the node already exists
not_in_nodes = False #boolean false --> node will not be created
break
if (not_in_nodes): #checks that there is no node with this doi
if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
reference_pub_obj = get_pub(reference, test_var)
if (type(reference_pub_obj) != Publication):
print(pub)
continue
reference_pub_obj.group = "depth"
nodes.append(reference_pub_obj) # appends the object to the set of nodes
edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges
references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
# adds edge only if citation already exists
elif [pub.doi_url,reference] not in edges:
edges.append([pub.doi_url,reference])
return references_pub_obj_list
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: List[Publication]
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement height-first-search on references
'''
# adds next level to nodes/edges
for pub in references_pub_obj_list:
new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_depth < search_depth_max):
process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
def add_references(input_nodes, input_edges, references_pub_obj_list, search_height, search_height_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: List[Publication]
:param search_height: current height to search for references
:type search_height_max: int
:param search_height_max: maximum height to search for references
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to call recursive depth-first-search of references
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
process_references_rec(references_pub_obj_list, search_height, search_height_max, test_var)
return(nodes, edges)