Skip to content
Snippets Groups Projects
add_citations_rec.py 7.16 KiB
Newer Older
# -*- coding: utf-8 -*-
"""
Functions to add citations recursivly for multiple ACS/Nature journals

"""

__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""


import sys  
from pathlib import Path
from os import error
sys.path.append("../")

from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub

def get_cit_type_list(pub, cit_type):
    '''
        :param pub:                 Publication which citations will be added
        :type pub:                  Publication

        :param cit_type:            variable to differenciate citation and reference call
        :type cit_type:             String

        function to create nodes and edges and call create_graph_structure_citations
    '''
    if (cit_type == "Citation"):
        return(pub.citations)
    else:
        return(pub.references)

def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var):
    '''
        :param input_nodes:         list of nodes from Processing
        :type input_nodes:          List[Publication]

        :param input_edges:         list of edges from Processing
        :type input_edges:          List[String, String]

        :param pub:                 Publication which citations will be added
        :type pub:                  Publication

        :param search_depth:        current depth to search for citations
        :type search_depth_max:     int

        :param search_depth_max:    maximum depth to search for citations
        :type search_depth_max:     int
        :param cit_type:            variable to differenciate citation and reference call
        :type cit_type:             String

        :param test_var:            variable to differenciate between test and url call
        :type test_var:             boolean

        function to create nodes and edges and call create_graph_structure_citations
    '''

    global nodes, edges
    nodes = input_nodes
    edges = input_edges

    return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var):
    '''
        :param pub:                 publication which citations will be added
        :type pub:                  Publication

        :param search_depth:        current depth to search for citations
        :type search_depth_max:     int

        :param search_depth_max:    maximum depth to search for citations
        :type search_depth_max:     int
        :param cit_type:            variable to differenciate citation and reference call
        :type cit_type:             String

        :param test_var:            variable to differenciate between test and url call
        :type test_var:             boolean

        adds a node for every citing publication unknown
        adds edges to added citations
    '''

    citations_pub_obj_list = []
    for citation in get_cit_type_list(pub, cit_type):
        not_in_nodes = True
        for node in nodes: # checks every citation for duplication 
            if (citation.doi_url == node.doi_url):
                not_in_nodes = False
                break
        if (not_in_nodes):
            if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly
                citation_pub_obj = get_pub(citation.doi_url, test_var)
                if (type(citation_pub_obj) != Publication):
                    print(pub)
                    continue 
                
                if (cit_type == "Citation"):
                    citation_pub_obj.group = search_depth + 1
                    edges.append([citation_pub_obj.doi_url,pub.doi_url])
                else:
                    citation_pub_obj.group = -(search_depth + 1)
                    edges.append([pub.doi_url,citation_pub_obj.doi_url])
                nodes.append(citation_pub_obj)                   
                citations_pub_obj_list.append(citation_pub_obj)

        # adds just the edge if citation already exists   
        else:
            if (cit_type == "Citation"):      
                if ([citation.doi_url,pub.doi_url] not in edges):
                    edges.append([citation.doi_url,pub.doi_url])
            else:
                if ([pub.doi_url,citation.doi_url] not in edges):
                    edges.append([pub.doi_url,citation.doi_url])   
    return citations_pub_obj_list


def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):  
    '''
        :param citations_pub_obj_list:  list of publications which citations will be added
        :type citations_pub_obj_list:   List[Publication]

        :param search_depth:        current depth to search for citations
        :type search_depth_max:     int

        :param search_depth_max:    maximum depth to search for citations
        :type search_depth_max:     int
        :param cit_type:            variable to differenciate citation and reference call
        :type cit_type:             String

        :param test_var:            variable to differenciate between test and url call
        :type test_var:             boolean

        recursive function to implement depth-first-search on citations
    '''

    # adds next level to nodes/edges
    for pub in citations_pub_obj_list:
        new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)   
        # If the maximum depth has not yet been reached, calls function recursivly with increased depth 
        if (search_depth < search_depth_max):
            process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var)
def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
    '''
        :param input_nodes:             list of nodes from Processing
        :type input_nodes:              List[Publication]

        :param input_edges:             list of edges from Processing
        :type input_edges:              List[String, String]

        :param citations_pub_obj_list:  list of publications which citations will be added
        :type citations_pub_obj_list:   List[Publication]

        :param search_depth:        current depth to search for citations
        :type search_depth_max:     int

        :param search_depth_max:    maximum depth to search for citations
        :type search_depth_max:     int
        :param cit_type:            variable to differenciate citation and reference call
        :type cit_type:             String

        :param test_var:            variable to differenciate between test and url call
        :type test_var:             boolean

        function to call recursive depth-first-search of citations
    '''
    global nodes, edges
    nodes = input_nodes
    edges = input_edges

    process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)
    #return(nodes, edges)