# -*- coding: utf-8 -*- """ Functions to add references recursivly for multiple ACS/Nature journals """ __authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" #__credits__ = ["", "", "", ""] #__license__ = "" #__version__ = "" #__maintainer__ = "" import sys from pathlib import Path from os import error sys.path.append("../../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub def create_global_lists_ref(input_nodes, input_edges, pub, search_depth, search_depth_max, test_var): ''' :param input_nodes: list of nodes from Processing :type input_nodes: List[Publication] :param input_edges: list of edges from Processing :type input_edges: List[String, String] :param pub: Publication which references will be added :type pub: Publication :param search_height: current height to search for references :type search_height_max: int :param search_height_max: maximum height to search for references :type search_height_max: int :param test_var: variable to differenciate between test and url call :type test_var: boolean function to create nodes and edges and call create_graph_structure_references ''' global nodes, edges nodes = input_nodes edges = input_edges return create_graph_structure_references(pub, search_depth, search_depth_max, test_var) def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): ''' :param pub: publication which references will be added :type pub: Publication :param search_depth: current depth to search for references :type search_depth: int :param search_depth_max: maximum depth to search for references :type search_depth_max: int :param test_var: variable to differenciate between test and url call :type test_var: boolean adds a node for every referenced publication unknown adds edges to added references ''' references_pub_obj_list = [] for reference in pub.references: #iterates over the references of the considered paper not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes for node in nodes: #iterates over all nodes in set of nodes # if (reference == node.doi_url): #determines that the node already exists not_in_nodes = False #boolean false --> node will not be created break if (not_in_nodes): #checks that there is no node with this doi if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit reference_pub_obj = get_pub(reference, test_var) if (type(reference_pub_obj) != Publication): print(pub) continue reference_pub_obj.group = "depth" nodes.append(reference_pub_obj) # appends the object to the set of nodes edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references # adds edge only if citation already exists elif [pub.doi_url,reference] not in edges: edges.append([pub.doi_url,reference]) return references_pub_obj_list def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): ''' :param references_pub_obj_list: list of publications which references will be added :type references_pub_obj_list: List[Publication] :param search_depth: current depth to search for references :type search_depth: int :param search_depth_max: maximum depth to search for references :type search_depth_max: int :param test_var: variable to differenciate between test and url call :type test_var: boolean recursive function to implement height-first-search on references ''' # adds next level to nodes/edges for pub in references_pub_obj_list: new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) # If the maximum height has not yet been reached, calls function recursivly with increased height if (search_depth < search_depth_max): process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var) def add_references(input_nodes, input_edges, references_pub_obj_list, search_height, search_height_max, test_var): ''' :param input_nodes: list of nodes from Processing :type input_nodes: List[Publication] :param input_edges: list of edges from Processing :type input_edges: List[String, String] :param references_pub_obj_list: list of publications which references will be added :type references_pub_obj_list: List[Publication] :param search_height: current height to search for references :type search_height_max: int :param search_height_max: maximum height to search for references :type search_height_max: int :param test_var: variable to differenciate between test and url call :type test_var: boolean function to call recursive depth-first-search of references ''' global nodes, edges nodes = input_nodes edges = input_edges process_references_rec(references_pub_obj_list, search_height, search_height_max, test_var) return(nodes, edges)