Skip to content
Snippets Groups Projects
Commit 4588d4d0 authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

verarbeitung: bug fix in update_graph and connect_new_input

parent 4ee83a0e
Branches
No related tags found
1 merge request!22Added abstracts to json, negative tests and bug fixes
......@@ -46,7 +46,7 @@ def get_cit_type_list(pub, cit_type):
:param cit_type: variable to differenciate citation and reference call
:type cit_type: String
function to create nodes and edges and call create_graph_structure_citations
function to return citation or reference list for given pub
'''
if cit_type == "Citation":
return(pub.citations)
......
......@@ -6,13 +6,16 @@ from verarbeitung.process_main import Processing
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
#doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709')
#doi_list.append('https://doi.org/10.1021/acs.chemrev.8b00728')
error_list = Processing(doi_list, 2, 2, 'test.json')
#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.chemrestox.0c00006')#
doi_list.append('https://doi.org/10.1021/acs.chemrev.8b00728')
doi_list.append('https://doi.org/10.1021/acs.jpclett.1c03335 ')
error_list = Processing(doi_list, 2, 2, 'test728.json')
print(error_list)
del doi_list
......
......@@ -19,16 +19,19 @@ from os import error
sys.path.append("../")
from input.publication import Publication
from verarbeitung.get_pub_from_input import get_pub
from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction
from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit
def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, cit_type):
def find_furthermost_citations_test(test_nodes, test_edges, changed_node, old_search_depth, new_search_depth, cit_type):
global nodes, edges
nodes = test_nodes
edges = test_edges
return(find_furthermost_citations(nodes, edges, changed_node, old_search_depth, cit_type))
return(find_furthermost_citations(nodes, edges, changed_node, old_search_depth, new_search_depth, cit_type))
def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_nodes, old_search_depth, old_search_height, new_search_depth, new_search_height):
global nodes, edges
......@@ -40,7 +43,7 @@ def complete_changed_group_nodes_test(test_nodes, test_edges, inserted_test_node
def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, cit_type):
def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new_search_depth, cit_type):
'''
:param new_nodes: list of nodes which are generated seperately from main node list to avoid recursive problems
:type new_nodes List[Publication]
......@@ -64,7 +67,7 @@ def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, cit
citations_saved = [node]
# group of node and old search depth/height determines how often the loop needs to be repeated
for depth in range(old_search_depth - abs(node.group)):
for depth in range(min(old_search_depth - abs(node.group), new_search_depth)):
new_citations = []
for citation in citations_saved:
for cit_node in nodes:
......@@ -147,36 +150,38 @@ def complete_changed_group_nodes(inserted_nodes, old_search_depth, old_search_he
# moves known reference node to input and completes citations and references for this node
if (node.group < 0) and (node.doi_url in inserted_nodes):
node.group = 0
new_max_citations = find_furthermost_citations(new_nodes, new_edges, node, old_search_height + abs(node.group), "Citation")
add_citations(new_nodes, new_edges, new_max_citations, old_search_height, new_search_height, "Citation", test_var)
pub = get_pub(node.doi_url, test_var)
if (type(pub) != Publication):
new_nodes, new_edges, error_doi_list_ref = init_graph_construction([node.doi_url], new_search_height, 0, test_var, True, new_nodes, new_edges)
error_doi_list.append(node.doi_url)
continue
for err_node in error_doi_list_ref:
if err_node not in error_doi_list:
error_doi_list.append(err_node)
pub.group = node.group
old_max_references = find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, "Reference")
add_citations(new_nodes, new_edges, old_max_references, min(old_search_depth - abs(node.group), new_search_depth), new_search_depth, "Reference", test_var)
add_citations(new_nodes, new_edges, [pub], 0, new_search_height, "Citation", test_var)
old_max_references = find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, "Reference")
add_citations(new_nodes, new_edges, old_max_references, old_search_depth, new_search_depth, "Reference", test_var)
pub.group = 0
new_nodes.append(pub)
handled_inserted_nodes.append(node)
# moves known citation node to input and completes citations and references for this node
elif (node.group > 0) and (node.doi_url in inserted_nodes):
node.group = 0
new_max_references = find_furthermost_citations(new_nodes, new_edges, node, old_search_depth + abs(node.group), "Reference")
add_citations(new_nodes, new_edges, new_max_references, old_search_depth, new_search_depth, "Reference", test_var)
#new_nodes.append(new_max_references)
pub = get_pub(node.doi_url, test_var)
if (type(pub) != Publication):
new_nodes, new_edges, error_doi_list_ref = init_graph_construction([node.doi_url], new_search_depth, 0, test_var, True, new_nodes, new_edges)
for err_node in error_doi_list_ref:
if err_node not in error_doi_list:
error_doi_list.append(err_node)
error_doi_list.append(node.doi_url)
continue
pub.group = node.group
old_max_citations = find_furthermost_citations(new_nodes, new_edges, pub, old_search_height, new_search_height, "Citation")
add_citations(new_nodes, new_edges, old_max_citations, min(old_search_height - abs(node.group), new_search_height), new_search_height, "Citation", test_var)
add_citations(new_nodes, new_edges, [pub], 0, new_search_depth, "Reference", test_var)
old_max_citations = find_furthermost_citations(new_nodes, new_edges, node, old_search_height, "Citation")
add_citations(new_nodes, new_edges, old_max_citations, old_search_height, new_search_height, "Citation", test_var)
pub.group = 0
new_nodes.append(pub)
handled_inserted_nodes.append(node)
for new_node in new_nodes:
......
......@@ -101,15 +101,18 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes
# retrieve which publications are already known, removed, inserted
common_nodes, inserted_nodes, deleted_nodes = compare_old_and_new_node_lists(old_doi_input_list, new_doi_input_list)
old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
processed_list_copy = processed_list.copy()
valid_edges_copy = valid_edges.copy()
old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var)
# deletes publications and edges from node_list if publications can no longer be reached
if (len(deleted_nodes) > 0):
processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges)
if (len(inserted_nodes) > 0):
inserted_pub_nodes, inserted_edges, error_doi_list_new = connect_old_and_new_input(processed_list_copy, valid_edges_copy, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var)
for err_node in error_doi_list_new:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment