Skip to content
Snippets Groups Projects
Select Git revision
  • fcb188f29f2c77808eb3c16aa24e6d5fcf04050c
  • main default protected
2 results

Processing.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    Processing.py 5.48 KiB
    # -*- coding: utf-8 -*-
    """
    Functions to generate a graph representing citations between multiple ACS/Nature journals
    
    """
    
    __authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
    __email__ = "cis-project2021@zbh.uni-hamburg.de"
    __status__ = "Production"
    #__copyright__ = ""
    #__credits__ = ["", "", "", ""]
    #__license__ = ""
    #__version__ = ""
    #__maintainer__ = ""
    
    
    import sys  
    from pathlib import Path
    from os import error
    sys.path.append("../")
    
    from input.publication import Publication
    from verarbeitung.get_pub_from_input import get_pub
    from .export_to_json import output_to_json
    from .add_citations_rec import add_citations, create_global_lists_cit
    
    
    def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
        '''
            :param doi_input_list:      input list of doi from UI
            :type doi_input_list:       List[String]
    
            :param search_depth_max:    maximum depth to search for references
            :type search_depth_max:     int
    
            :param search_height_max:   maximum height to search for citations
            :type search_height_max:    int
    
            :param test_var:            variable to differenciate between test and url call
            :type test_var:             boolean
    
            adds input dois to nodes and retrieves citations and references for input publications
        '''
    
        # saves found citations and references in lists
        references_pub_obj_list = []
        citations_pub_obj_list = []
    
        for pub_doi in doi_input_list: #iterates over every incoming doi
            pub = get_pub(pub_doi, test_var)
            if (type(pub) != Publication):
                print(pub)
                continue       
    
            # checks if publication already exists in nodes
            not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
            for node in nodes: #iterates over every node in the set of nodes
                if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set
                    not_in_nodes = False #false --> node will not be created
                    break
            if (not_in_nodes): #there is no node with this doi in the set
                nodes.append(pub) #appends Publication Object
                pub.group = 0
            else:
                doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list
    
            # inserts references as publication objects into list and 
            # inserts first depth references into nodes/edges if maximum search depth > 0
            for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var):
                references_pub_obj_list.append(reference)
    
            # inserts citations as publication objects into list and 
            # inserts first height citations into nodes if maximum search height > 0
            for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var):
                citations_pub_obj_list.append(citation)
    
        return(references_pub_obj_list, citations_pub_obj_list)
            
    
    
    def complete_inner_edges():
        '''
            completes inner edges between nodes of group height and depth
        '''
    
        for node in nodes:
            if (node.group < 0):
                for citation in node.citations:
                    for cit in nodes:
                        if (citation == cit.doi_url and [citation, node.doi_url] not in edges):
                            edges.append([citation, node.doi_url])
            if (node.group > 0):
                for reference in node.references:
                    for ref in nodes:
                        if (reference == ref.doi_url and [node.doi_url, reference] not in edges):
                            edges.append([node.doi_url,reference])
    
    
    def process_main(doi_input_list, search_height, search_depth, test_var = False):
        '''
            :param doi_input_list:  input list of doi from UI
            :type doi_input_list:   List[String]
    
            :param search_height:   maximum height to search for citations
            :type search_height:    int
    
            :param search_depth:    maximum depth to search for references
            :type search_depth:     int
    
            :param test_var:        variable to differenciate between test and url call
            :type test_var:         boolean
    
            main function to start graph generation
        '''
    
        # ERROR-Handling doi_array = NULL
        if (len(doi_input_list) == 0):
            print("Error, no input data")
    
        # ERROR- if a negative number is entered for height
        if (search_height < 0):
            print("Error, search_height of search must be positive")
    
        # ERROR- if a negative number is entered for depth
        if (search_depth < 0):
            print("Error, search_depth of search must be positive")       
    
        
        # creates empty lists to save nodes and edges
        global nodes, edges
        nodes = [] 
        edges = [] 
    
        # initializes nodes/edges from input and gets a list with publication objects for citations and references returned
        references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
    
        # function calls to begin recursive processing up to max depth/height
        add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
        add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
    
        # adds edges between reference group and citation group of known publications
        complete_inner_edges()
    
        # calls a skript to save nodes and edges of graph in .json file
        output_to_json(nodes,edges, test_var)
    
        return(nodes,edges)