# -*- coding: utf-8 -*- """ Functions to add citations recursivly for multiple ACS/Nature journals """ __authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" #__credits__ = ["", "", "", ""] #__license__ = "" #__version__ = "" #__maintainer__ = "" import sys from pathlib import Path from os import error sys.path.append("../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub def create_graph_structure_citations_test(pub, search_depth, search_depth_max, cit_type, test_var, test_nodes, test_edges): ''' :param test_nodes: list of publications from unit test :type test_nodes: List[Publication] :param test_edges: list of links from unit test :type test_edges: List[List[String,String]] for unit test purposes only ''' global nodes, edges nodes = test_nodes edges = test_edges return(nodes, edges, create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)) def get_cit_type_list(pub, cit_type): ''' :param pub: Publication which citations will be added :type pub: Publication :param cit_type: variable to differenciate citation and reference call :type cit_type: String function to create nodes and edges and call create_graph_structure_citations ''' if cit_type == "Citation": return(pub.citations) elif cit_type == "Reference": return(pub.references) else: return(ValueError) def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var): ''' :param input_nodes: list of nodes from Processing :type input_nodes: List[Publication] :param input_edges: list of edges from Processing :type input_edges: List[String, String] :param pub: Publication which citations will be added :type pub: Publication :param search_depth: current depth to search for citations :type search_depth_max: int :param search_depth_max: maximum depth to search for citations :type search_depth_max: int :param cit_type: variable to differenciate citation and reference call :type cit_type: String :param test_var: variable to differenciate between test and url call :type test_var: boolean function to create nodes and edges and call create_graph_structure_citations ''' global nodes, edges nodes = input_nodes edges = input_edges return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var) def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var): ''' :param pub: publication which citations will be added :type pub: Publication :param search_depth: current depth to search for citations :type search_depth_max: int :param search_depth_max: maximum depth to search for citations :type search_depth_max: int :param cit_type: variable to differenciate citation and reference call :type cit_type: String :param test_var: variable to differenciate between test and url call :type test_var: boolean adds a node for every citing publication unknown adds edges to added citations ''' citations_pub_obj_list = [] for citation in get_cit_type_list(pub, cit_type): not_in_nodes = True for node in nodes: # checks every citation for duplication if (citation.doi_url == node.doi_url): not_in_nodes = False break if (not_in_nodes): if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly citation_pub_obj = get_pub(citation.doi_url, test_var) if (type(citation_pub_obj) != Publication): #print(pub) continue if (cit_type == "Citation"): citation_pub_obj.group = search_depth + 1 edges.append([citation_pub_obj.doi_url,pub.doi_url]) else: citation_pub_obj.group = -(search_depth + 1) edges.append([pub.doi_url,citation_pub_obj.doi_url]) nodes.append(citation_pub_obj) citations_pub_obj_list.append(citation_pub_obj) # adds just the edge if citation already exists else: if (cit_type == "Citation"): if ([citation.doi_url,pub.doi_url] not in edges): edges.append([citation.doi_url,pub.doi_url]) else: if ([pub.doi_url,citation.doi_url] not in edges): edges.append([pub.doi_url,citation.doi_url]) return citations_pub_obj_list def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var): ''' :param citations_pub_obj_list: list of publications which citations will be added :type citations_pub_obj_list: List[Publication] :param search_depth: current depth to search for citations :type search_depth_max: int :param search_depth_max: maximum depth to search for citations :type search_depth_max: int :param cit_type: variable to differenciate citation and reference call :type cit_type: String :param test_var: variable to differenciate between test and url call :type test_var: boolean recursive function to implement depth-first-search on citations ''' # adds next level to nodes/edges for pub in citations_pub_obj_list: new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var) # If the maximum depth has not yet been reached, calls function recursivly with increased depth if (search_depth < search_depth_max): process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var) def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var): ''' :param input_nodes: list of nodes from Processing :type input_nodes: List[Publication] :param input_edges: list of edges from Processing :type input_edges: List[String, String] :param citations_pub_obj_list: list of publications which citations will be added :type citations_pub_obj_list: List[Publication] :param search_depth: current depth to search for citations :type search_depth_max: int :param search_depth_max: maximum depth to search for citations :type search_depth_max: int :param cit_type: variable to differenciate citation and reference call :type cit_type: String :param test_var: variable to differenciate between test and url call :type test_var: boolean function to call recursive depth-first-search of citations ''' global nodes, edges nodes = input_nodes edges = input_edges process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var) #return(nodes, edges)