Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# -*- coding: utf-8 -*-
"""
Functions to generate a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds input dois to nodes and retrieves citations and references for input publications
'''
# saves found citations and references in lists
references_pub_obj_list = []
citations_pub_obj_list = []
for pub_doi in doi_input_list: #iterates over every incoming doi
pub = get_pub(pub_doi, test_var)
if (type(pub) != Publication):
print(pub)
continue
# checks if publication already exists in nodes
not_in_nodes = True #boolean value to check if a node already exists in the set of nodes
for node in nodes: #iterates over every node in the set of nodes
if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set
not_in_nodes = False #false --> node will not be created
break
if (not_in_nodes): #there is no node with this doi in the set
nodes.append(pub) #appends Publication Object
else:
doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list
# inserts references as publication objects into list and
# inserts first depth references into nodes/edges if maximum search depth > 0
for reference in create_global_lists_cit(nodes, edges, pub, 0, search_depth_max, "Reference", test_var):
references_pub_obj_list.append(reference)
# inserts citations as publication objects into list and
# inserts first height citations into nodes if maximum search height > 0
for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, "Citation", test_var):
citations_pub_obj_list.append(citation)
return(references_pub_obj_list, citations_pub_obj_list)
'''
completes inner edges between nodes of group height and depth
'''
for node in nodes:
for pub in nodes:
if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)):
edges.append([citation.doi_url, node.doi_url])
for reference in node.references:
for pub in nodes:
if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)):
edges.append([node.doi_url,reference.doi_url])
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, update_var = False, input_nodes = [], input_edges = []):
'''
:param doi_input_list: input list of doi from UI
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
:param search_height: maximum height to search for citations
:type search_height: int
:param search_depth: maximum depth to search for references
:type search_depth: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
main function to start graph generation
'''
# ERROR-Handling doi_array = NULL
if (len(doi_input_list) == 0):
print("Error, no input data")
# ERROR- if a negative number is entered for height
if (search_height < 0):
print("Error, search_height of search must be positive")
# ERROR- if a negative number is entered for depth
if (search_depth < 0):
print("Error, search_depth of search must be positive")
# creates empty lists to save nodes and edges
global nodes, edges
if update_var:
nodes = input_nodes
edges = input_edges
else:
nodes = []
edges = []
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
# adds edges between reference group and citation group of known publications
# calls a skript to save nodes and edges of graph in .json file