Skip to content
Snippets Groups Projects
Select Git revision
  • a2eb55146a8a4f42795b26c475a51069f303d06d
  • main default protected
2 results

add_references_rec.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    add_references_rec.py 6.02 KiB
    # -*- coding: utf-8 -*-
    """
    Functions to add references recursivly for multiple ACS/Nature journals
    
    """
    
    __authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
    __email__ = "cis-project2021@zbh.uni-hamburg.de"
    __status__ = "Production"
    #__copyright__ = ""
    #__credits__ = ["", "", "", ""]
    #__license__ = ""
    #__version__ = ""
    #__maintainer__ = ""
    
    
    import sys  
    from pathlib import Path
    from os import error
    sys.path.append("../../")
    
    from input.publication import Publication
    from verarbeitung.get_pub_from_input import get_pub
    
    def create_global_lists_ref(input_nodes, input_edges, pub, search_depth, search_depth_max, test_var):
        '''
            :param input_nodes:         list of nodes from Processing
            :type input_nodes:          List[Publication]
    
            :param input_edges:         list of edges from Processing
            :type input_edges:          List[String, String]
    
            :param pub:                 Publication which references will be added
            :type pub:                  Publication
    
            :param search_height:       current height to search for references
            :type search_height_max:    int
    
            :param search_height_max:   maximum height to search for references
            :type search_height_max:    int
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            function to create nodes and edges and call create_graph_structure_references 
        '''
    
        global nodes, edges
        nodes = input_nodes
        edges = input_edges
    
        return create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
    
    
    def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
        '''
            :param pub:                 publication which references will be added
            :type pub:                  Publication
    
            :param search_depth:        current depth to search for references
            :type search_depth:         int
    
            :param search_depth_max:    maximum depth to search for references
            :type search_depth_max:     int
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            adds a node for every referenced publication unknown
            adds edges to added references
        '''
    
        references_pub_obj_list = []
        for reference in pub.references: #iterates over the references of the considered paper
            not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
            for node in nodes: #iterates over all nodes in set of nodes #
                if (reference == node.doi_url): #determines that the node already exists
                    not_in_nodes = False #boolean false --> node will not be created
                    break
            if (not_in_nodes): #checks that there is no node with this doi 
                if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
                    reference_pub_obj = get_pub(reference, test_var)
                    if (type(reference_pub_obj) != Publication):
                        print(pub)
                        continue 
    
                    reference_pub_obj.group = "depth"
                    nodes.append(reference_pub_obj) # appends the object to the set of nodes
                    edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges
                    references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
    
            # adds edge only if citation already exists           
            elif [pub.doi_url,reference] not in edges:
                edges.append([pub.doi_url,reference])  
        return references_pub_obj_list 
    
    
    def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):  
        '''
            :param references_pub_obj_list: list of publications which references will be added
            :type references_pub_obj_list:  List[Publication]
    
            :param search_depth:            current depth to search for references
            :type search_depth:             int
    
            :param search_depth_max:        maximum depth to search for references
            :type search_depth_max:         int
    
            :param test_var:                variable to differenciate between test and url call
            :type test_var:                 boolean
    
            recursive function to implement height-first-search on references
        '''
    
        # adds next level to nodes/edges
        for pub in references_pub_obj_list:
            new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)    
    
            # If the maximum height has not yet been reached, calls function recursivly with increased height     
            if (search_depth < search_depth_max):
                process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
    
    
    def add_references(input_nodes, input_edges, references_pub_obj_list, search_height, search_height_max, test_var):
        '''
            :param input_nodes:             list of nodes from Processing
            :type input_nodes:              List[Publication]
    
            :param input_edges:             list of edges from Processing
            :type input_edges:              List[String, String]
    
            :param references_pub_obj_list:  list of publications which references will be added
            :type references_pub_obj_list:   List[Publication]
    
            :param search_height:       current height to search for references
            :type search_height_max:    int
    
            :param search_height_max:   maximum height to search for references
            :type search_height_max:    int
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            function to call recursive depth-first-search of references
        '''
        global nodes, edges
        nodes = input_nodes
        edges = input_edges
    
        process_references_rec(references_pub_obj_list, search_height, search_height_max, test_var)
        return(nodes, edges)