Skip to content
Snippets Groups Projects
Select Git revision
  • 8afb0d5a8ce55ec6fc9928b9e148fd8ebe4ed201
  • master default protected
  • est-autem-a-officia-quibusdam-et-dolor
  • dignissimos-libero-alias-distinctio-sequi-mollitia-quia
  • repudiandae-quia-repellat-ipsa-enim-pariatur-quae
  • voluptatibus-ut-earum-fuga-reprehenderit-repudiandae-id
  • doloremque-qui-facere-quo-ea-vel-nostrum
  • quod-expedita-vitae-voluptatum-quo-qui-ipsum
  • aliquam-ratione-assumenda-quos-architecto-tempora-pariatur
  • cupiditate-hic-molestias-facilis-non-qui-praesentium
  • architecto-consequuntur-cupiditate-quo-delectus-similique-sunt
  • eaque-voluptatibus-omnis-labore-aut-qui-possimus
  • dicta-veniam-adipisci-rem-consequatur-ut-delectus
  • beatae-nulla-eum-aliquid-ut-nesciunt-commodi
  • itaque-deserunt-et-quos-non-sit-ut
  • debitis-repellat-tempora-accusantium-quia-ad-nam
  • dignissimos-modi-autem-dolores-fugiat-ipsum-officiis
  • unde-et-enim-aut-aut-dignissimos-atque
  • dolorem-quisquam-mollitia-quia-cum-quam-dolores
  • iure-rem-veritatis-ullam-voluptas-error-ad
  • iste-temporibus-adipisci-error-exercitationem-eaque-omnis
21 results

README.md

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    connect_new_input.py 12.21 KiB
    # -*- coding: utf-8 -*-
    """
    Functions to update a graph representing citations between multiple ACS/Nature journals
    
    """
    
    __authors__ = "Donna Löding, Alina Molkentin, Judith Große, Malte Schokolowski"
    __email__ = "cis-project2021@zbh.uni-hamburg.de"
    __status__ = "Production"
    
    # __copyright__ = ""
    # __credits__ = ["", "", "", ""]
    # __license__ = ""
    # __version__ = ""
    # __maintainer__ = ""
    
    import sys
    from pathlib import Path
    from os import error
    
    sys.path.append("../")
    
    from input.publication import Publication
    from verarbeitung.get_pub_from_input import get_pub
    from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
    from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit
    
    
    def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, new_search_depth, cit_type):
        global nodes, edges
        nodes = test_nodes
        edges = test_edges
    
        return (find_furthermost_citations(nodes, edges, changed_node, old_search_depth, new_search_depth, cit_type))
    
    
    def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height,
                                          new_search_depth, new_search_height):
        global nodes, edges
        nodes = test_nodes
        edges = test_edges
    
        handled_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_test_nodes, old_search_depth,
                                                                           old_search_height, new_search_depth,
                                                                           new_search_height, True)
        return (new_nodes, new_edges, handled_nodes)
    
    
    def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new_search_depth, cit_type):
        '''
            :param new_nodes:           list of nodes which are generated separately from main node list to avoid recursive problems
            :type new_nodes             List[Publication]
    
            :param new_edges:           list of edges which are generated separately from main edge list to avoid recursive problems
            :type new_edges:            List[List[String,String]]
    
            :param node:                node which is known but not from input group
            :type node:                 Publication
    
            :param old_search_depth:    depth to search for references from old construction call
            :type old_search_depth:     int
    
            :param cit_type:            determines whether the function call is for a reference or citation
            :type cit_type:             String
    
            function to find the furthermost citation/reference for given node which is from the same group and
            adds all found nodes between input node and furthermost citations.
            It return a list of furthermost citations.
        '''
        citations_saved = [node]
    
        # group of node and old search depth/height determines how often the loop needs to be repeated
        for depth in range(min(old_search_depth - abs(node.group), new_search_depth)):
            new_citations = []
            for citation in citations_saved:
                for cit_node in nodes:
                    if citation.doi_url == cit_node.doi_url:
                        for new_cit in get_cit_type_list(cit_node, cit_type):
                            for new_cit_node in nodes:
                                if new_cit.doi_url == new_cit_node.doi_url:
    
                                    if cit_type == "Citation":
    
                                        # to find a cycle and not change height
                                        not_in_citations = True
                                        for new_cit_node_citation in new_cit_node.citations:
                                            if (cit_node.doi_url == new_cit_node_citation.doi_url):
                                                not_in_citations = False
                                                break
                                        if (not_in_citations):
                                            new_citations.append(new_cit_node)
    
                                        # change height accordingly and add link to edge
                                        new_cit_node.group = node.group + depth
                                        if [cit_node.doi_url, cit_node.doi_url] not in new_edges:
                                            new_edges.append([new_cit_node.doi_url, cit_node.doi_url])
    
                                    elif cit_type == "Reference":
    
                                        # to find a cycle and not change depth
                                        not_in_citations = True
                                        for new_cit_node_reference in new_cit_node.references:
                                            if (new_cit_node.doi_url == new_cit_node_reference.doi_url):
                                                not_in_citations = False
                                                break
                                        if (not_in_citations):
                                            new_citations.append(new_cit_node)
    
                                        # change height accordingly and add link to edge
                                        new_cit_node.group = node.group + depth
                                        if [cit_node.doi_url, new_cit_node.doi_url] not in new_edges:
                                            new_edges.append([cit_node.doi_url, new_cit_node.doi_url])
    
            citations_saved = new_citations
            for new_citation in new_citations:
                if new_citation not in new_nodes:
                    new_nodes.append(new_citation)
    
        # returns the references/citations which needs to be processed to complete construction
        return (citations_saved)
    
    
    def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth,
                                     new_search_height, test_var):
        '''
            :param inserted_nodes:      list of nodes which are inserted to new input array  
            :type inserted_nodes:       List[String]
    
            :param old_search_depth:    depth to search for references from old construction call
            :type old_search_depth:     int
    
            :param old_search_height:   height to search for citations from old construction call
            :type old_search_height:    int
    
            :param new_search_depth:    depth to search for references from new construction call
            :type new_search_depth:     int
    
            :param new_search_height:   height to search for citations from new construction call
            :type new_search_height:    int
    
            :param test_var:            variable to differentiate between test and url call
            :type test_var:             boolean
    
            completes the references and citations for nodes which were known in non input group
        '''
        # changed_group_node_citations = []
        # changed_group_node_references = []
    
        # saves which nodes were handled because they were known before
        handled_inserted_nodes = []
        new_nodes = []
        new_edges = []
        for node in nodes:
    
            # moves known reference node to input and completes citations and references for this node
            if (node.group < 0) and (node.doi_url in inserted_nodes):
    
                # get pub from input
                pub = get_pub(node.doi_url, test_var)
                if (type(pub) != Publication):
                    error_doi_list.append(node.doi_url)
                    continue
    
                # find old maximum publications and complete tree to new max depth
                pub.group = node.group
                old_max_references = find_furthermost_citations(new_nodes, new_edges, pub, old_search_depth,
                                                                new_search_depth, "Reference")
                add_citations(new_nodes, new_edges, old_max_references,
                              min(old_search_depth - abs(node.group), new_search_depth), new_search_depth, "Reference",
                              test_var)
    
                # add tree for citations
                add_citations(new_nodes, new_edges, [pub], 0, new_search_height, "Citation", test_var)
    
                pub.group = 0
                new_nodes.append(pub)
                handled_inserted_nodes.append(node)
    
            # moves known citation node to input and completes citations and references for this node
            elif (node.group > 0) and (node.doi_url in inserted_nodes):
    
                # get pub from input
                pub = get_pub(node.doi_url, test_var)
                if (type(pub) != Publication):
                    error_doi_list.append(node.doi_url)
                    continue
    
                # find old maximum publications and complete tree to new max depth
                pub.group = node.group
                old_max_citations = find_furthermost_citations(new_nodes, new_edges, pub, old_search_height,
                                                               new_search_height, "Citation")
                add_citations(new_nodes, new_edges, old_max_citations,
                              min(old_search_height - abs(node.group), new_search_height), new_search_height, "Citation",
                              test_var)
    
                # add tree for citations
                add_citations(new_nodes, new_edges, [pub], 0, new_search_depth, "Reference", test_var)
    
                pub.group = 0
                new_nodes.append(pub)
                handled_inserted_nodes.append(node)
    
        # ensure, input pubs are declared as group 0
        for new_node in new_nodes:
            for inserted_node in inserted_nodes:
                if new_node.doi_url == inserted_node:
                    new_node.group = 0
    
        return (handled_inserted_nodes, new_nodes, new_edges)
    
    
    def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height,
                                  new_search_depth, new_search_height, test_var=False):
        '''
            :param input_nodes_list:    list of nodes which are processed for new construction call
            :type input_nodes_list:     List[Publication]
    
            :param input_edges_list:    list of links between nodes from input_nodes_list.
            :type input_edges_list:     List[List[String,String]]
    
            :param inserted_nodes:      list of nodes which are inserted to new input array  
            :type inserted_nodes:       List[String]
    
            :param old_search_depth:    depth to search for references from old construction call
            :type old_search_depth:     int
    
            :param old_search_height:   height to search for citations from old construction call
            :type old_search_height:    int
    
            :param new_search_depth:    depth to search for references from new construction call
            :type new_search_depth:     int
    
            :param new_search_height:   height to search for citations from new construction call
            :type new_search_height:    int
    
            :param test_var:            variable to differentiate between test and url call
            :type test_var:             boolean
    
            completes the references and citations for nodes which were known in non input group
        '''
        global nodes, edges, error_doi_list
        nodes = input_nodes_list.copy()
        edges = input_edges_list.copy()
        error_doi_list = []
    
        handled_inserted_nodes, new_nodes, new_edges = complete_changed_group_nodes(inserted_nodes, old_search_depth,
                                                                                    old_search_height, new_search_depth,
                                                                                    new_search_height, test_var)
    
        # copy all nodes from inserted_nodes to new node, if node is not in handled_inserted_nodes
        not_handled_inserted_nodes = [node for node in inserted_nodes if node not in handled_inserted_nodes]
    
        # function call to begin recursive processing up to max depth/height for unhandled nodes
        if len(not_handled_inserted_nodes) > 0:
            new_nodes, new_edges, error_doi_list_new = init_graph_construction(not_handled_inserted_nodes, new_search_depth,
                                                                               new_search_height, test_var=test_var,
                                                                               update_var=True, input_nodes=new_nodes,
                                                                               input_edges=new_edges)
            for err_node in error_doi_list_new:
                if err_node not in error_doi_list:
                    error_doi_list.append(err_node)
    
        return (new_nodes, new_edges, error_doi_list)