Skip to content
Snippets Groups Projects
initialize_graph.py 5.71 KiB
Newer Older
# -*- coding: utf-8 -*-
"""
Functions to generate a graph representing citations between multiple ACS/Nature journals

"""

__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""


import sys  
from pathlib import Path
from os import error
sys.path.append("../")

from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit


def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
    '''
        :param doi_input_list:      input list of doi from UI
        :type doi_input_list:       List[String]

        :param search_depth_max:    maximum depth to search for references
        :type search_depth_max:     int

        :param search_height_max:   maximum height to search for citations
        :type search_height_max:    int

        :param test_var:            variable to differenciate between test and url call
        :type test_var:             boolean

        adds input dois to nodes and retrieves citations and references for input publications
    '''

    # saves found citations and references in lists
    references_pub_obj_list = []
    citations_pub_obj_list = []

    for pub_doi in doi_input_list: #iterates over every incoming doi
        pub = get_pub(pub_doi, test_var)
        if (type(pub) != Publication):
            print(pub)
            continue       

        # checks if publication already exists in nodes
        not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
        for node in nodes: #iterates over every node in the set of nodes
            if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set
                not_in_nodes = False #false --> node will not be created
                break
        if (not_in_nodes): #there is no node with this doi in the set
            nodes.append(pub) #appends Publication Object
            pub.group = 0
        else:
            doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list

        # inserts references as publication objects into list and 
        # inserts first depth references into nodes/edges if maximum search depth > 0
        for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var):
            references_pub_obj_list.append(reference)

        # inserts citations as publication objects into list and 
        # inserts first height citations into nodes if maximum search height > 0
        for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var):
            citations_pub_obj_list.append(citation)

    return(references_pub_obj_list, citations_pub_obj_list)
        


def complete_inner_edges():
    '''
        completes inner edges between nodes of group height and depth
    '''

    for node in nodes:
        if (node.group < 0):
            for citation in node.citations:
                for pub in nodes:
                    if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)):
                        edges.append([citation.doi_url, node.doi_url])
        if (node.group > 0):
            for reference in node.references:
                for pub in nodes:
                    if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)):
                        edges.append([node.doi_url,reference.doi_url])
Malte Schokolowski's avatar
Malte Schokolowski committed
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, update_var = False, input_nodes = [], input_edges = []):
    '''
        :param doi_input_list:  input list of doi from UI
        :type doi_input_list:   List[String]

        :param search_height:   maximum height to search for citations
        :type search_height:    int

        :param search_depth:    maximum depth to search for references
        :type search_depth:     int

        :param test_var:        variable to differenciate between test and url call
        :type test_var:         boolean

        main function to start graph generation
    '''

    # ERROR-Handling doi_array = NULL
    if (len(doi_input_list) == 0):
        print("Error, no input data")

    # ERROR- if a negative number is entered for height
    if (search_height < 0):
        print("Error, search_height of search must be positive")

    # ERROR- if a negative number is entered for depth
    if (search_depth < 0):
        print("Error, search_depth of search must be positive")       

    
    # creates empty lists to save nodes and edges
    global nodes, edges
Malte Schokolowski's avatar
Malte Schokolowski committed
    if update_var:
        nodes = input_nodes
        edges = input_edges 
    else:
        nodes = []
        edges = []

    # initializes nodes/edges from input and gets a list with publication objects for citations and references returned
    references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)

    # function calls to begin recursive processing up to max depth/height
    add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
    add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)

    # adds edges between reference group and citation group of known publications
    complete_inner_edges()

    # calls a skript to save nodes and edges of graph in .json file
Malte Schokolowski's avatar
Malte Schokolowski committed
    output_to_json(nodes, edges, test_var = test_var)

    return(nodes,edges)