Skip to content
Snippets Groups Projects
Commit 02103a68 authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

new branch and finished connect_new_input

parent 004d7d8a
Branches
No related tags found
1 merge request!12bug fixes and updates to code
......@@ -96,7 +96,7 @@ def complete_inner_edges():
edges.append([node.doi_url,reference.doi_url])
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False):
def init_graph_construction(doi_input_list, search_depth, search_height, test_var = False, input_nodes = [], input_edges = []):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: List[String]
......@@ -128,8 +128,8 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va
# creates empty lists to save nodes and edges
global nodes, edges
nodes = []
edges = []
nodes = input_nodes
edges = input_edges
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var)
......
......@@ -16,7 +16,7 @@ __status__ = "Production"
import sys
from pathlib import Path
from os import error
from os import error, path
sys.path.append("../")
......@@ -42,13 +42,13 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso
'''
# updates graph if json file is known in directory otherwise starts new graph construction
try:
with open(json_file) as f:
nodes, edges = update_graph(url_list, json_file, search_depth, search_height)
except IOError:
nodes, edges = init_graph_construction(url_list, search_depth, search_height)
json_file = Path(json_file)
if json_file.is_file():
nodes, edges = update_graph(url_list, json_file, search_depth, search_height)
else:
nodes, edges = init_graph_construction(url_list, search_depth, search_height)
# exports graph to given json file name
output_to_json(nodes, edges, json_file)
\ No newline at end of file
......@@ -8,4 +8,5 @@ doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
Processing(doi_list, 3, 3, 'test.json')
\ No newline at end of file
doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
Processing(doi_list, 2, 2, 'test.json')
This diff is collapsed.
......@@ -16,69 +16,138 @@ __status__ = "Production"
import sys
from pathlib import Path
from os import error
sys.path.append("../")
from .import_from_json import input_from_json
from verarbeitung.construct_new_graph.initialize_graph import initialize_nodes_list, complete_inner_edges
from verarbeitung.construct_new_graph.add_citations_rec import add_citations
from verarbeitung.construct_new_graph.export_to_json import output_to_json
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit
def connect_old_and_new_input(json_file, new_doi_list, search_depth, search_height, test_var = False):
def find_furthermost_citations(node, old_search_depth, cit_type):
'''
:param json_file: json file with old graph
:type json_file: json file
:param node: node which is known but not from input group
:type node: Publication
:param new_doi_list: additional dois which has to be connected to the old graph
:type new_doi_list: list of strings
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param search_depth: depth to search for references
:type search_depth: int
:param cit_type: determines whether the function call is for a reference or citation
:type cit_type: String
:param search_height: height to search for citations
:type search_height: int
function to find the furthermost citation/reference for given node which is from the same group
'''
citations_saved = get_cit_type_list(node, cit_type)
# group of node and old search depth/height determines how often the loop needs to be repeated
for depth in range(old_search_depth - abs(node.group)):
new_citations = []
for citation in citations_saved:
for cit_node in nodes:
if citation.doi_url == cit_node.doi_url:
for new_cit in get_cit_type_list(cit_node, cit_type):
for new_cit_node in nodes:
if new_cit.doi_url == new_cit_node.doi_url:
new_citations.append(node)
citations_saved = new_citations
# returns the references/citations which needs to be processed to complete contruction
return(citations_saved)
def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var):
'''
:param inserted_nodes: list of nodes which are inserted to new input array
:type inserted_nodes: List[String]
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
connetcs the old graph and the new input dois to a complete new graph
'''
global nodes, edges
nodes = []
edges = []
:param old_search_height: height to search for citations from old construction call
:type old_search_height: int
nodes, edges = input_from_json(json_file)
:param new_search_depth: depth to search for references from new construction call
:type new_search_depth: int
complete_changed_group_nodes(new_doi_list, search_depth, search_height, test_var)
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
references_obj_list, citations_obj_list = initialize_nodes_list(new_doi_list,search_depth, search_height, test_var)
:param new_search_height: height to search for citations from new construction call
:type new_search_height: int
# function calls to begin recursive processing up to max depth/height
add_citations(nodes, edges, citations_obj_list, 1, search_height, "Citation", test_var)
add_citations(nodes, edges, references_obj_list, 1, search_depth, "Reference", test_var)
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
# adds edges between reference group and citation group of known publications
complete_inner_edges(test_var)
completes the references and citations for nodes which were known in non input group
'''
#changed_group_node_citations = []
#changed_group_node_references = []
# calls a skript to save nodes and edges of graph in .json file
output_to_json(nodes,edges, test_var)
# saves which nodes were handled because they were known before
handled_inserted_nodes = []
for node in nodes:
return(nodes, edges)
# moves known reference node to input and completes citations and references for this node
if (node.group < 0) and (node.doi_url in inserted_nodes):
node_citations = create_global_lists_cit(nodes, edges, node, 1, new_search_height, "Citation", test_var)
add_citations(nodes, edges, node_citations, 1, new_search_height, "Citation", test_var)
old_max_references = find_furthermost_citations(node, old_search_depth, "Reference")
add_citations(nodes, edges, old_max_references, old_search_depth, new_search_depth, "Reference", test_var)
node.group = 0
handled_inserted_nodes.append(node)
# moves known citation node to input and completes citations and references for this node
elif (node.group > 0) and (node.doi_url in inserted_nodes):
node_references = create_global_lists_cit(nodes, edges, node, 1, new_search_depth, "Reference", test_var)
add_citations(nodes, edges, node_references, 1, new_search_depth, "Reference", test_var)
old_max_citations = find_furthermost_citations(node, old_search_height, "Citation")
add_citations(nodes, edges, old_max_citations, old_search_height, new_search_height, "Citation", test_var)
node.group = 0
handled_inserted_nodes.append(node)
def complete_changed_group_nodes(new_doi_list, search_depth_max, search_height_max, test_var):
return(handled_inserted_nodes)
def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var = False):
'''
work in progress
:param input_nodes_list: list of nodes which are processed for new construction call
:type input_nodes_list: List[Publication]
:param input_edges_list: list of links between nodes from input_nodes_list.
:type input_edges_list: List[List[String,String]]
:param inserted_nodes: list of nodes which are inserted to new input array
:type inserted_nodes: List[String]
:param old_search_depth: depth to search for references from old construction call
:type old_search_depth: int
:param old_search_height: height to search for citations from old construction call
:type old_search_height: int
:param new_search_depth: depth to search for references from new construction call
:type new_search_depth: int
:param new_search_height: height to search for citations from new construction call
:type new_search_height: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
completes the references and citations for nodes which were known in non input group
'''
changed_group_node_citations = []
changed_group_node_references = []
global nodes, edges
nodes = input_nodes_list
edges = input_edges_list
for node in nodes:
if (node.group < 0) and (node.doi in new_doi_list):
node.group = "input"
handled_inserted_nodes = complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height, test_var)
elif (node.group > 0) and (node.doi in new_doi_list):
node.group = "input"
# copy all nodes from inserted_nodes to new node, if node is not in handled_inserted_nodes
not_handled_inserted_nodes = [node for node in inserted_nodes if node not in handled_inserted_nodes]
# initializes nodes/edges from input and gets a list with publication objects for citations and references returned
#references_obj_list, citations_obj_list = initialize_nodes_list(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var)
# function calls to begin recursive processing up to max depth/height
#add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var)
#add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var)
init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, input_nodes = nodes, input_edges = edges)
......@@ -133,6 +133,8 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var):
# adds edges between reference group and citation group of known publications
complete_inner_edges()
return(old_depth, old_height)
# -*- coding: utf-8 -*-
1# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals
......@@ -20,6 +20,7 @@ sys.path.append("../../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from .Knoten_Vergleich import doi_listen_vergleichen
from .delete_nodes_edges import delete_nodes_and_edges
from .connect_new_input import connect_old_and_new_input
......@@ -85,7 +86,6 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
# gets information from previous cunstruction call
old_obj_input_list , old_edges_list = input_from_json(json_file)
print(type(old_edges_list[1]))
# one global list to save the process of removing unneeded publications and one to save valid edges
global processed_list, valid_edges
......@@ -104,9 +104,9 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
if (len(deleted_nodes) > 0):
processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list)
update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
if (len(inserted_nodes) > 0):
connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var)
connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var)
return(processed_list, valid_edges)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment