Skip to content
Snippets Groups Projects
Commit c2137759 authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

Dateien und Funktionen ausgelagert

parent 79e37e83
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
This commit is part of merge request !11. Comments created here will be created in the context of that merge request.
Showing
with 453 additions and 0 deletions
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
File deleted
......@@ -20,8 +20,10 @@ from os import error
sys.path.append("../")
from input.publication import Publication
from get_pub_from_input import get_pub
from json_demo import output_to_json
from verarbeitung.get_pub_from_input import get_pub
from .export_to_json import output_to_json
from .add_citations_rec import add_citations, create_global_lists_cit
from .add_references_rec import add_references, create_global_lists_ref
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
......@@ -65,12 +67,12 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
# inserts references as publication objects into list and
# inserts first depth references into nodes/edges if maximum search depth > 0
for reference in create_graph_structure_references(pub, 0, search_depth_max, test_var):
for reference in create_global_lists_ref(nodes, edges, pub, 0, search_depth_max, test_var):
references_pub_obj_list.append(reference)
# inserts citations as publication objects into list and
# inserts first height citations into nodes if maximum search height > 0
for citation in create_graph_structure_citations(pub, 0, search_height_max, test_var):
for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, test_var):
citations_pub_obj_list.append(citation)
return(references_pub_obj_list, citations_pub_obj_list)
......@@ -98,147 +100,6 @@ def complete_inner_edges(test_var):
edges.append([node.doi_url,reference])
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Publication
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every referenced publication unknown
adds edges to added references
'''
references_pub_obj_list = []
for reference in pub.references: #iterates over the references of the considered paper
not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
for node in nodes: #iterates over all nodes in set of nodes #
if (reference == node.doi_url): #determines that the node already exists
not_in_nodes = False #boolean false --> node will not be created
break
if (not_in_nodes): #checks that there is no node with this doi
if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
reference_pub_obj = get_pub(reference, test_var)
if (type(reference_pub_obj) != Publication):
print(pub)
continue
reference_pub_obj.group = "depth"
nodes.append(reference_pub_obj) # appends the object to the set of nodes
edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges
references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
# adds edge only if citation already exists
elif [pub.doi_url,reference] not in edges:
edges.append([pub.doi_url,reference])
return references_pub_obj_list
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: List[Publication]
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement height-first-search on references
'''
# adds next level to nodes/edges
for pub in references_pub_obj_list:
new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_depth < search_depth_max):
process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
'''
:param pub: publication which citations will be added
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
'''
citations_pub_obj_list = []
for citation in pub.citations:
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_height < search_height_max): #checks if its a test and chooses input function accordingly
citation_pub_obj = get_pub(citation, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
citation_pub_obj.group = "height"
nodes.append(citation_pub_obj)
edges.append([citation_pub_obj.doi_url,pub.doi_url])
citations_pub_obj_list.append(citation_pub_obj)
# adds only edge if citation already exists
elif [citation,pub.doi_url] not in edges:
edges.append([citation,pub.doi_url])
return citations_pub_obj_list
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
'''
# adds next level to nodes/edges
for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_height < search_height_max):
process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var)
def process_main(doi_input_list, search_height, search_depth, test_var = False):
'''
:param doi_input_list: input list of doi from UI
......@@ -278,8 +139,8 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False):
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
# function calls to begin recursive processing up to max depth/height
process_citations_rec(citations_obj_list, 1, search_height, test_var)
process_references_rec(references_obj_list, 1, search_depth, test_var)
add_citations(nodes, edges, citations_obj_list, 1, search_height, test_var)
add_references(nodes, edges, references_obj_list, 1, search_depth, test_var)
# adds edges between reference group and citation group of known publications
complete_inner_edges(test_var)
......
# -*- coding: utf-8 -*-
"""
Functions to add citations recursivly for multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def create_global_lists_cit(input_nodes, input_edges, pub, search_height, search_height_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param pub: Publication which citations will be added
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to create nodes and edges and call create_graph_structure_citations
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
return create_graph_structure_citations(pub, search_height, search_height_max, test_var)
def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
'''
:param pub: publication which citations will be added
:type pub: Publication
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every citing publication unknown
adds edges to added citations
'''
citations_pub_obj_list = []
for citation in pub.citations:
not_in_nodes = True
for node in nodes: # checks every citation for duplication
if (citation == node.doi_url):
not_in_nodes = False
break
if (not_in_nodes):
if (search_height < search_height_max): #checks if its a test and chooses input function accordingly
citation_pub_obj = get_pub(citation, test_var)
if (type(citation_pub_obj) != Publication):
print(pub)
continue
citation_pub_obj.group = "height"
nodes.append(citation_pub_obj)
edges.append([citation_pub_obj.doi_url,pub.doi_url])
citations_pub_obj_list.append(citation_pub_obj)
# adds only edge if citation already exists
elif [citation,pub.doi_url] not in edges:
edges.append([citation,pub.doi_url])
return citations_pub_obj_list
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement depth-first-search on citations
'''
# adds next level to nodes/edges
for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_height < search_height_max):
process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var)
def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_height, search_height_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: List[Publication]
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to call recursive depth-first-search of citations
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var)
return(nodes, edges)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to add references recursivly for multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
from os import error
sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
def create_global_lists_ref(input_nodes, input_edges, pub, search_depth, search_depth_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param pub: Publication which references will be added
:type pub: Publication
:param search_height: current height to search for references
:type search_height_max: int
:param search_height_max: maximum height to search for references
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to create nodes and edges and call create_graph_structure_references
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
return create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Publication
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
adds a node for every referenced publication unknown
adds edges to added references
'''
references_pub_obj_list = []
for reference in pub.references: #iterates over the references of the considered paper
not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes
for node in nodes: #iterates over all nodes in set of nodes #
if (reference == node.doi_url): #determines that the node already exists
not_in_nodes = False #boolean false --> node will not be created
break
if (not_in_nodes): #checks that there is no node with this doi
if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit
reference_pub_obj = get_pub(reference, test_var)
if (type(reference_pub_obj) != Publication):
print(pub)
continue
reference_pub_obj.group = "depth"
nodes.append(reference_pub_obj) # appends the object to the set of nodes
edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges
references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references
# adds edge only if citation already exists
elif [pub.doi_url,reference] not in edges:
edges.append([pub.doi_url,reference])
return references_pub_obj_list
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: List[Publication]
:param search_depth: current depth to search for references
:type search_depth: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
recursive function to implement height-first-search on references
'''
# adds next level to nodes/edges
for pub in references_pub_obj_list:
new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
# If the maximum height has not yet been reached, calls function recursivly with increased height
if (search_depth < search_depth_max):
process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var)
def add_references(input_nodes, input_edges, references_pub_obj_list, search_height, search_height_max, test_var):
'''
:param input_nodes: list of nodes from Processing
:type input_nodes: List[Publication]
:param input_edges: list of edges from Processing
:type input_edges: List[String, String]
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: List[Publication]
:param search_height: current height to search for references
:type search_height_max: int
:param search_height_max: maximum height to search for references
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
function to call recursive depth-first-search of references
'''
global nodes, edges
nodes = input_nodes
edges = input_edges
process_references_rec(references_pub_obj_list, search_height, search_height_max, test_var)
return(nodes, edges)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment