Skip to content
Snippets Groups Projects
Select Git revision
  • a0ed20225f4d2e828d32953052ebfc12c5d0f760
  • main default protected
2 results

add_citations_rec.py

Blame
  • Forked from Ockenden, Samuel / CiS Projekt
    Source project has a limited visibility.
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    add_citations_rec.py 7.22 KiB
    # -*- coding: utf-8 -*-
    """
    Functions to add citations recursivly for multiple ACS/Nature journals
    
    """
    
    __authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
    __email__ = "cis-project2021@zbh.uni-hamburg.de"
    __status__ = "Production"
    #__copyright__ = ""
    #__credits__ = ["", "", "", ""]
    #__license__ = ""
    #__version__ = ""
    #__maintainer__ = ""
    
    
    import sys  
    from pathlib import Path
    from os import error
    sys.path.append("../")
    
    from input.publication import Publication
    from verarbeitung.get_pub_from_input import get_pub
    
    def get_cit_type_list(pub, cit_type):
        '''
            :param pub:                 Publication which citations will be added
            :type pub:                  Publication
    
            :param cit_type:            variable to differenciate citation and reference call
            :type cit_type:             String
    
            function to create nodes and edges and call create_graph_structure_citations
        '''
        if cit_type == "Citation":
            return(pub.citations)
        elif cit_type == "Reference":
            return(pub.references)
        else:
            return(ValueError)
    
    def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var):
        '''
            :param input_nodes:         list of nodes from Processing
            :type input_nodes:          List[Publication]
    
            :param input_edges:         list of edges from Processing
            :type input_edges:          List[String, String]
    
            :param pub:                 Publication which citations will be added
            :type pub:                  Publication
    
            :param search_depth:        current depth to search for citations
            :type search_depth_max:     int
    
            :param search_depth_max:    maximum depth to search for citations
            :type search_depth_max:     int
    
            :param cit_type:            variable to differenciate citation and reference call
            :type cit_type:             String
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            function to create nodes and edges and call create_graph_structure_citations
        '''
    
        global nodes, edges
        nodes = input_nodes
        edges = input_edges
    
        return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
    
    
    def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var):
        '''
            :param pub:                 publication which citations will be added
            :type pub:                  Publication
    
            :param search_depth:        current depth to search for citations
            :type search_depth_max:     int
    
            :param search_depth_max:    maximum depth to search for citations
            :type search_depth_max:     int
    
            :param cit_type:            variable to differenciate citation and reference call
            :type cit_type:             String
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            adds a node for every citing publication unknown
            adds edges to added citations
        '''
    
        citations_pub_obj_list = []
        for citation in get_cit_type_list(pub, cit_type):
            not_in_nodes = True
            for node in nodes: # checks every citation for duplication 
                if (citation.doi_url == node.doi_url):
                    not_in_nodes = False
                    break
            if (not_in_nodes):
                if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly
                    citation_pub_obj = get_pub(citation.doi_url, test_var)
                    if (type(citation_pub_obj) != Publication):
                        print(pub)
                        continue 
                    
                    if (cit_type == "Citation"):
                        citation_pub_obj.group = search_depth + 1
                        edges.append([citation_pub_obj.doi_url,pub.doi_url])
                    else:
                        citation_pub_obj.group = -(search_depth + 1)
                        edges.append([pub.doi_url,citation_pub_obj.doi_url])
                    nodes.append(citation_pub_obj)                   
                    citations_pub_obj_list.append(citation_pub_obj)
    
            # adds just the edge if citation already exists   
            else:
                if (cit_type == "Citation"):      
                    if ([citation.doi_url,pub.doi_url] not in edges):
                        edges.append([citation.doi_url,pub.doi_url])
                else:
                    if ([pub.doi_url,citation.doi_url] not in edges):
                        edges.append([pub.doi_url,citation.doi_url])   
        return citations_pub_obj_list
    
    
    def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):  
        '''
            :param citations_pub_obj_list:  list of publications which citations will be added
            :type citations_pub_obj_list:   List[Publication]
    
            :param search_depth:        current depth to search for citations
            :type search_depth_max:     int
    
            :param search_depth_max:    maximum depth to search for citations
            :type search_depth_max:     int
    
            :param cit_type:            variable to differenciate citation and reference call
            :type cit_type:             String
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            recursive function to implement depth-first-search on citations
        '''
    
        # adds next level to nodes/edges
        for pub in citations_pub_obj_list:
            new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)   
    
            # If the maximum depth has not yet been reached, calls function recursivly with increased depth 
            if (search_depth < search_depth_max):
                process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var)
    
    
    def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
        '''
            :param input_nodes:             list of nodes from Processing
            :type input_nodes:              List[Publication]
    
            :param input_edges:             list of edges from Processing
            :type input_edges:              List[String, String]
    
            :param citations_pub_obj_list:  list of publications which citations will be added
            :type citations_pub_obj_list:   List[Publication]
    
            :param search_depth:        current depth to search for citations
            :type search_depth_max:     int
    
            :param search_depth_max:    maximum depth to search for citations
            :type search_depth_max:     int
    
            :param cit_type:            variable to differenciate citation and reference call
            :type cit_type:             String
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            function to call recursive depth-first-search of citations
        '''
        global nodes, edges
        nodes = input_nodes
        edges = input_edges
    
        process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)
        #return(nodes, edges)