From 2b8fcc8e60ae1c02babd12574c532ad87c68a676 Mon Sep 17 00:00:00 2001 From: Malte Schokolowski <baw8441@uni-hamburg.de> Date: Thu, 23 Dec 2021 15:05:43 +0100 Subject: [PATCH] added return of dois with error --- .../construct_new_graph/add_citations_rec.py | 2 +- .../construct_new_graph/initialize_graph.py | 6 ++++-- verarbeitung/get_pub_from_input.py | 4 +--- verarbeitung/process_main.py | 6 ++++-- verarbeitung/start_script.py | 19 +++++++++++++++++++ .../update_graph/connect_new_input.py | 3 +-- verarbeitung/update_graph/update_graph.py | 6 ++++-- 7 files changed, 34 insertions(+), 12 deletions(-) create mode 100644 verarbeitung/start_script.py diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py index 87badf4..95afa35 100644 --- a/verarbeitung/construct_new_graph/add_citations_rec.py +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -120,7 +120,7 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly citation_pub_obj = get_pub(citation.doi_url, test_var) if (type(citation_pub_obj) != Publication): - print(pub) + #print(pub) continue if (cit_type == "Citation"): diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index bfc7df2..4446453 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -76,6 +76,7 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t pub = get_pub(pub_doi, test_var) if (type(pub) != Publication): #print(pub) + error_doi_list.append(pub_doi) continue # checks if publication already exists in nodes @@ -163,13 +164,14 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va # creates empty lists to save nodes and edges - global nodes, edges + global nodes, edges, error_doi_list if update_var: nodes = input_nodes edges = input_edges else: nodes = [] edges = [] + error_doi_list = [] # initializes nodes/edges from input and gets a list with publication objects for citations and references returned references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) @@ -188,4 +190,4 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va del edges gc.collect() - return(new_nodes,new_edges) + return(new_nodes,new_edges, error_doi_list) diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py index 2766ba8..6bdce3b 100644 --- a/verarbeitung/get_pub_from_input.py +++ b/verarbeitung/get_pub_from_input.py @@ -40,9 +40,7 @@ def get_pub(pub_doi, test_var): else: inter = Input() try: - pub = inter.get_publication(pub_doi) #creates an object of class Publication - except AttributeError: - pub = inter.get_publication(pub_doi) + pub = inter.get_publication(pub_doi) #creates an object of class Publication) except ValueError: return(ValueError) except IndexError: diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py index db17e1f..78df2db 100644 --- a/verarbeitung/process_main.py +++ b/verarbeitung/process_main.py @@ -45,10 +45,12 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso json_file = Path(json_file) if json_file.is_file(): - nodes, edges = update_graph(url_list, json_file, search_depth, search_height) + nodes, edges, error_doi_list = update_graph(url_list, json_file, search_depth, search_height) else: - nodes, edges = init_graph_construction(url_list, search_depth, search_height) + nodes, edges, error_doi_list = init_graph_construction(url_list, search_depth, search_height) # exports graph to given json file name output_to_json(nodes, edges, json_file) + + return error_doi_list \ No newline at end of file diff --git a/verarbeitung/start_script.py b/verarbeitung/start_script.py new file mode 100644 index 0000000..4901706 --- /dev/null +++ b/verarbeitung/start_script.py @@ -0,0 +1,19 @@ +import sys +import gc +from pathlib import Path +from verarbeitung.process_main import Processing +#from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes + + +doi_list = [] +#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') +#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') +#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') +doi_list.append('https://doi.org/10.1021/acs.medchem.0c01332') +doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709') +error_list = Processing(doi_list, 2, 2, 'test.json') +print(error_list) + +del doi_list +del error_list +gc.collect() diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index 3560341..5acb05f 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -149,5 +149,4 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes #add_citations(nodes, edges, citations_obj_list, 1, new_search_height, "Citation", test_var) #add_citations(nodes, edges, references_obj_list, 1, new_search_depth, "Reference", test_var) - init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges) - + nodes, edges, error_doi_list = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = nodes, input_edges = edges) diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 7e3ad76..739aa71 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -60,6 +60,7 @@ def get_new_input_dois(new_input, test_var): pub = get_pub(new_node, test_var) if (type(pub) != Publication): #print(pub) + error_doi_list.append(new_node) continue new_input_dois.append(pub.doi_url) @@ -87,9 +88,10 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes old_obj_input_list , old_edges_list = input_from_json(json_file) # one global list to save the process of removing unneeded publications and one to save valid edges - global processed_list, valid_edges + global processed_list, valid_edges, error_doi_list processed_list = old_obj_input_list valid_edges = old_edges_list + error_doi_list = [] # get dois from lists to compare for differences @@ -108,4 +110,4 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes if (len(inserted_nodes) > 0): connect_old_and_new_input(processed_list, valid_edges, inserted_nodes, old_search_depth, old_search_height, search_depth, search_height, test_var) - return(processed_list, valid_edges) + return(processed_list, valid_edges, error_doi_list) -- GitLab