Skip to content
Snippets Groups Projects
delete_nodes_edges.py 3.25 KiB
Newer Older
# -*- coding: utf-8 -*-
"""
Functions to remove publications/links from nodes/edges list, if they can no longer be reached

"""

__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""

import sys  
from pathlib import Path
sys.path.append("../../")
from .Kanten_Vergleich import back_to_valid_edges
def search_ref_graph_rec(pub):
    :param pub: pub go get appended to usable_nodes
    :type pub:  Publication
    
    function that appends nodes of group "reference" to list usable_nodes, if they are reachable from input nodes
    '''
    for reference in pub.references:
        for ref_pub in input_obj_list:
            if ((reference.doi_url == ref_pub.doi_url) and (ref_pub not in usable_nodes)):
                usable_nodes.append(ref_pub)

                # to find a cyclus and avoid recursion error
                not_in_citations = True
                for citation in pub.citations:
                    if (reference.doi_url == citation.doi_url):
                        not_in_citations = False
                        break
                if (not_in_citations):  
                    search_ref_graph_rec(ref_pub)
def search_cit_graph_rec(pub):  
    :param pub: pub go get appended to usable_nodes
    :type pub:  Publication
    
    function that appends nodes of group "citation" to list usable_nodes, if they are reachable from input nodes
    '''  
    for citation in pub.citations:
        for cit_pub in input_obj_list:
            if ((citation.doi_url == cit_pub.doi_url) and (cit_pub not in usable_nodes)):
                usable_nodes.append(cit_pub)

                # to find a cyclus and avoid recursion error
                not_in_references = True
                for reference in pub.references:
                    if (citation.doi_url == reference.doi_url):
                        not_in_references = False
                        break
                if (not_in_references):  
                    search_cit_graph_rec(cit_pub)
def delete_nodes_and_edges(input_list, common_nodes, old_edges_list):
    '''
    :param input_list:      list of publications to get reduced
    :type input_list:       List[Publication]

    :param common_nodes:        list of input dois which are in old and new input call
    :type common_nodes:         List[String]

    :param old_edges_list:      list of links between publications from old call
    :type old_edges_list:       List[List[String,String]]
    
    function to start recursive node removal for references and citations and to change edge list to valid state
    global usable_nodes, input_obj_list
    usable_nodes = []
    input_obj_list = input_list

    # starts for every common input node a tree-search and adds found nodes to usable_nodes
    for common in common_nodes:
        for pub in input_obj_list:
            if (common == pub.doi_url):
                usable_nodes.append(pub)
                search_ref_graph_rec(pub)
                search_cit_graph_rec(pub)

    valid_edges = back_to_valid_edges(old_edges_list, usable_nodes)

    return(usable_nodes, valid_edges)