# -*- coding: utf-8 -*- """ Functions to generate a graph representing citations between multiple ACS/Nature journals """ __authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" __email__ = "cis-project2021@zbh.uni-hamburg.de" __status__ = "Production" #__copyright__ = "" #__credits__ = ["", "", "", ""] #__license__ = "" #__version__ = "" #__maintainer__ = "" import sys import gc from pathlib import Path from os import error sys.path.append("../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub from .export_to_json import output_to_json from .add_citations_rec import add_citations, create_global_lists_cit def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_max, test_var): ''' :param doi_input_list: input list of doi from UI :type doi_input_list: List[String] :param search_depth_max: maximum depth to search for references :type search_depth_max: int :param search_height_max: maximum height to search for citations :type search_height_max: int :param test_var: variable to differenciate between test and url call :type test_var: boolean for unit test purposes only ''' global nodes, edges nodes = [] edges = [] return(initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var)) def complete_inner_edges_test(test_nodes, test_edges): ''' :param test_nodes: list of publications from unit test :type test_nodes: List[Publication] :param test_nodes: list of links from unit test :type test_nodes: List[List[String,String]] for unit test purposes only ''' global nodes, edges nodes = test_nodes edges = test_edges complete_inner_edges() return(nodes, edges) def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): ''' :param doi_input_list: input list of doi from UI :type doi_input_list: List[String] :param search_depth_max: maximum depth to search for references :type search_depth_max: int :param search_height_max: maximum height to search for citations :type search_height_max: int :param test_var: variable to differenciate between test and url call :type test_var: boolean adds input dois to nodes and retrieves citations and references for input publications ''' # saves found citations and references in lists references_pub_obj_list = [] citations_pub_obj_list = [] for pub_doi in doi_input_list: #iterates over every incoming doi pub = get_pub(pub_doi, test_var) if (type(pub) != Publication): print(pub) continue # checks if publication already exists in nodes not_in_nodes = True #boolean value to check if a node already exists in the set of nodes for node in nodes: #iterates over every node in the set of nodes if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set not_in_nodes = False #false --> node will not be created break if (not_in_nodes): #there is no node with this doi in the set nodes.append(pub) #appends Publication Object pub.group = 0 else: doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list # inserts references as publication objects into list and # inserts first depth references into nodes/edges if maximum search depth > 0 for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var): references_pub_obj_list.append(reference) # inserts citations as publication objects into list and # inserts first height citations into nodes if maximum search height > 0 for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var): citations_pub_obj_list.append(citation) return(references_pub_obj_list, citations_pub_obj_list) def complete_inner_edges(): ''' completes inner edges between nodes of group height and depth ''' for node in nodes: if (node.group < 0): for citation in node.citations: for pub in nodes: if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)): edges.append([citation.doi_url, node.doi_url]) if (node.group > 0): for reference in node.references: for pub in nodes: if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)): edges.append([node.doi_url,reference.doi_url]) def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, update_var = False, input_nodes = [], input_edges = []): ''' :param doi_input_list: input list of doi from UI :type doi_input_list: List[String] :param search_height: maximum height to search for citations :type search_height: int :param search_depth: maximum depth to search for references :type search_depth: int :param test_var: variable to differenciate between test and url call :type test_var: boolean :param test_var: variable to check if call is from update_graph with known nodes and edges or fresh construction :type test_var: boolean :param input_nodes: list of publications from update_graph :type input_nodes: List[Publication] :param input_nodes: list of links from update_graph :type input_nodes: List[List[String,String]] main function to start graph generation ''' # ERROR-Handling doi_array = NULL if (len(doi_input_list) == 0): print("Error, no input data") # ERROR- if a negative number is entered for height if (search_height < 0): print("Error, search_height of search must be positive") # ERROR- if a negative number is entered for depth if (search_depth < 0): print("Error, search_depth of search must be positive") # creates empty lists to save nodes and edges global nodes, edges if update_var: nodes = input_nodes edges = input_edges else: nodes = [] edges = [] # initializes nodes/edges from input and gets a list with publication objects for citations and references returned references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) # function calls to begin recursive processing up to max depth/height add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var) add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var) # adds edges between reference group and citation group of known publications complete_inner_edges() # garbage collection to delete nodes and edges lists. Needed because python keeps lists after function end till next function call new_nodes = nodes.copy() new_edges = edges.copy() del nodes del edges gc.collect() return(new_nodes,new_edges)