Newer
Older
# -*- coding: utf-8 -*-
"""
Functions to add citations recursivly for multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def get_cit_type_list(pub, cit_type):
'''
:param pub: Publication which citations will be added
:type pub: Publication
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
function to create nodes and edges and call create_graph_structure_citations
'''
if (cit_type == "Citation"):
return(pub.citations)
else:
return(pub.references)
def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param pub: Publication which citations will be added
:type pub: Publication
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to create nodes and edges and call create_graph_structure_citations
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
return create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var):
'''
:param pub: publication which citations will be added
:type pub: Publication
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
'''
citations_pub_obj_list = []
for citation in get_cit_type_list(pub, cit_type):
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation.doi_url == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly
citation_pub_obj = get_pub(citation.doi_url, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
if (cit_type == "Citation"):
citation_pub_obj.group = search_depth + 1
edges.append([citation_pub_obj.doi_url,pub.doi_url])
else:
citation_pub_obj.group = -(search_depth + 1)
edges.append([pub.doi_url,citation_pub_obj.doi_url])
nodes.append(citation_pub_obj)
citations_pub_obj_list.append(citation_pub_obj)
# adds just the edge if citation already exists
else:
if (cit_type == "Citation"):
if ([citation.doi_url,pub.doi_url] not in edges):
edges.append([citation.doi_url,pub.doi_url])
else:
if ([pub.doi_url,citation.doi_url] not in edges):
edges.append([pub.doi_url,citation.doi_url])
return citations_pub_obj_list
def process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
'''
# adds next level to nodes/edges
for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_depth, search_depth_max, cit_type, test_var)
# If the maximum depth has not yet been reached, calls function recursivly with increased depth
if (search_depth < search_depth_max):
process_citations_rec(new_citation_pub_obj_list, search_depth+1, search_depth_max, cit_type, test_var)
def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_depth: current depth to search for citations
:type search_depth_max: int
:param search_depth_max: maximum depth to search for citations
:type search_depth_max: int
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to call recursive depth-first-search of citations
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var)