Skip to content
Snippets Groups Projects
connect_new_input.py 7.25 KiB
Newer Older
# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals

"""

__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""

import sys  
from pathlib import Path
from os import error
sys.path.append("../")

from .import_from_json import input_from_json
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit

def find_furthermost_citations(node, old_search_depth, cit_type):
Molkentin, Alina's avatar
Molkentin, Alina committed
    '''
        :param node:                node which is known but not from input group
        :type node:                 Publication
        :param old_search_depth:    depth to search for references from old construction call
        :type old_search_depth:     int
        :param cit_type:            determines whether the function call is for a reference or citation
        :type cit_type:             String
        function to find the furthermost citation/reference for given node which is from the same group
    '''
    citations_saved = get_cit_type_list(node, cit_type)

    # group of node and old search depth/height determines how often the loop needs to be repeated
    for depth in range(old_search_depth - abs(node.group)):
        new_citations = []
        for citation in citations_saved:
            for cit_node in nodes:
                if citation.doi_url == cit_node.doi_url:
                    for new_cit in get_cit_type_list(cit_node, cit_type):
                        for new_cit_node in nodes:
                            if new_cit.doi_url == new_cit_node.doi_url:
Malte Schokolowski's avatar
Malte Schokolowski committed
                                new_cit_node.group -= node.group
                                new_citations.append(new_cit_node)
        citations_saved = new_citations

    # returns the references/citations which needs to be processed to complete contruction
    return(citations_saved)

def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var):
    '''
        :param inserted_nodes:      list of nodes which are inserted to new input array  
        :type inserted_nodes:       List[String]
        :param old_search_depth:    depth to search for references from old construction call
        :type old_search_depth:     int
        :param old_search_height:   height to search for citations from old construction call
        :type old_search_height:    int
        :param new_search_depth:    depth to search for references from new construction call
        :type new_search_depth:     int
        :param new_search_height:   height to search for citations from new construction call
        :type new_search_height:    int
        :param test_var:            variable to differenciate between test and url call
        :type test_var:             boolean
        completes the references and citations for nodes which were known in non input group
    '''
    #changed_group_node_citations = []
    #changed_group_node_references = []
    # saves which nodes were handled because they were known before
    handled_inserted_nodes = []
    for node in nodes:
        # moves known reference node to input and completes citations and references for this node
        if (node.group < 0) and (node.doi_url in inserted_nodes):
            node_citations = create_global_lists_cit(nodes, edges, node, 1, new_search_height, "Citation", test_var)
            add_citations(nodes, edges, node_citations, 1, new_search_height, "Citation", test_var)
            old_max_references = find_furthermost_citations(node, old_search_depth, "Reference")
            add_citations(nodes, edges, old_max_references, old_search_depth, new_search_depth, "Reference", test_var)
            node.group = 0
            handled_inserted_nodes.append(node)
            
        # moves known citation node to input and completes citations and references for this node
        elif (node.group > 0) and (node.doi_url in inserted_nodes):
            node_references = create_global_lists_cit(nodes, edges, node, 1, new_search_depth, "Reference", test_var)
            add_citations(nodes, edges, node_references, 1, new_search_depth, "Reference", test_var)
            old_max_citations = find_furthermost_citations(node, old_search_height, "Citation")
            add_citations(nodes, edges, old_max_citations, old_search_height, new_search_height, "Citation", test_var)
            node.group = 0
            handled_inserted_nodes.append(node)
    return(handled_inserted_nodes)


def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var = False):
Molkentin, Alina's avatar
Molkentin, Alina committed
    '''
        :param input_nodes_list:    list of nodes which are processed for new construction call
        :type input_nodes_list:     List[Publication]

        :param input_edges_list:    list of links between nodes from input_nodes_list.
        :type input_edges_list:     List[List[String,String]]

        :param inserted_nodes:      list of nodes which are inserted to new input array  
        :type inserted_nodes:       List[String]

        :param old_search_depth:    depth to search for references from old construction call
        :type old_search_depth:     int

        :param old_search_height:   height to search for citations from old construction call
        :type old_search_height:    int

        :param new_search_depth:    depth to search for references from new construction call
        :type new_search_depth:     int

        :param new_search_height:   height to search for citations from new construction call
        :type new_search_height:    int

        :param test_var:            variable to differenciate between test and url call
        :type test_var:             boolean

        completes the references and citations for nodes which were known in non input group
Molkentin, Alina's avatar
Molkentin, Alina committed
    '''
    global nodes, edges
    nodes = input_nodes_list
    edges = input_edges_list
    handled_inserted_nodes = complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var)
    # copy all nodes from inserted_nodes to new node, if node is not in handled_inserted_nodes
    not_handled_inserted_nodes = [node for node in inserted_nodes if node not in handled_inserted_nodes]
    
    # initializes nodes/edges from input and gets a list with publication objects for citations and references returned
    #references_obj_list, citations_obj_list = initialize_nodes_list(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var)

    # function calls to begin recursive processing up to max depth/height
    #add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var)
    #add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var)
Malte Schokolowski's avatar
Malte Schokolowski committed
    init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges)