diff --git a/verarbeitung/Kanten_Vergleich.py b/verarbeitung/Kanten_Vergleich.py index 0b3225d55c008adb328ffc0651b6f72f16bc59c1..da43e21eb2a445b845d9f58fc826f128de524383 100644 --- a/verarbeitung/Kanten_Vergleich.py +++ b/verarbeitung/Kanten_Vergleich.py @@ -9,7 +9,7 @@ def back_to_valid_edges(links_from_json, processed_input_list): function that deletes edges, if one ore two including nodes are deleted nodes ''' - list_of_valid_edges = links_from_json + list_of_valid_edges = links_from_json.copy() #iterates over all edges from old graph @@ -23,6 +23,8 @@ def back_to_valid_edges(links_from_json, processed_input_list): # increases counter if adjacent node was found if (adj_node == pub.doi_url): found_adj_nodes += 1 + if (found_adj_nodes == 2): + break #removes the edge if less than 2 adjacent nodes found if (found_adj_nodes < 2): diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index a3e245a6e337d089ec1fee75c1827a4be0df7ba3..4beb8e85ff1e6f1df02bd557aa574eb0f6330057 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -13,22 +13,21 @@ __status__ = "Production" #__version__ = "" #__maintainer__ = "" -from bs4 import BeautifulSoup as bs -import requests as req + import sys from pathlib import Path -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +from os import error sys.path.append("../") -from input.interface import InputInterface as Input -#import input -from input_test import input_test_func + +from input.publication import Publication +from get_pub_from_input import get_pub from json_demo import output_to_json def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): ''' :param doi_input_list: input list of doi from UI - :type doi_input_list: list of strings + :type doi_input_list: List[String] :param search_depth_max: maximum depth to search for references :type search_depth_max: int @@ -38,24 +37,19 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t :param test_var: variable to differenciate between test and url call :type test_var: boolean + + adds input dois to nodes and retrieves citations and references for input publications ''' + # saves found citations and references in lists references_pub_obj_list = [] citations_pub_obj_list = [] for pub_doi in doi_input_list: #iterates over every incoming doi - if(test_var): #checks that it is a test and chooses test-input function - pub = input_test_func(pub_doi) #creates an object of class Publication - else: #checks that it isnt a test and chooses standart-input function - #print(pub_doi) - inter = Input() - try: - pub = inter.get_publication(pub_doi) #creates an object of class Publication - except ValueError: - continue - except IndexError: - continue - + pub = get_pub(pub_doi, test_var) + if (type(pub) != Publication): + print(pub) + continue # checks if publication already exists in nodes not_in_nodes = True #boolean value to check if a node already exists in the set of nodes @@ -81,34 +75,34 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t return(references_pub_obj_list, citations_pub_obj_list) - + def complete_inner_edges(test_var): ''' :param test_var: variable to differenciate between test and url call :type test_var: boolean + + completes inner edges between nodes of group height and depth ''' for node in nodes: if (node.group == "depth"): for citation in node.citations: for cit in nodes: - if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges): - edges.append([citation.doi_url, node.doi_url]) + if (citation == cit.doi_url and [citation, node.doi_url] not in edges): + edges.append([citation, node.doi_url]) if (node.group == "height"): for reference in node.references: for ref in nodes: - if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges): - edges.append([node.doi_url,reference.doi_url]) - + if (reference == ref.doi_url and [node.doi_url, reference] not in edges): + edges.append([node.doi_url,reference]) -# adds a node for every publication unknown -# adds edges for references between publications + def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): ''' :param pub: publication which references will be added - :type pub: Class Publication + :type pub: Publication :param search_depth: current depth to search for references :type search_depth: int @@ -118,29 +112,24 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ :param test_var: variable to differenciate between test and url call :type test_var: boolean + + adds a node for every referenced publication unknown + adds edges to added references ''' references_pub_obj_list = [] for reference in pub.references: #iterates over the references of the considered paper not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes - for node in nodes: #iterates over all nodes in set of nodes - if (reference.doi_url == node.doi_url): #determines that the node already exists + for node in nodes: #iterates over all nodes in set of nodes # + if (reference == node.doi_url): #determines that the node already exists not_in_nodes = False #boolean false --> node will not be created break if (not_in_nodes): #checks that there is no node with this doi if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit - if (test_var): #determines that it is a test and chooses the test-input function - reference_pub_obj = input_test_func(reference.doi_url) #creates an Object of Publication Class - else: #determines that it isnt a test and chooses the standart function - #reference_pub_obj = Input(reference.doi_url) - inter = Input() - try: - reference_pub_obj = inter.get_publication(reference.doi_url) - except ValueError: - continue - # nur aus Testzwecken, da noch was bei Input falsch ist - except IndexError: - continue + reference_pub_obj = get_pub(reference, test_var) + if (type(reference_pub_obj) != Publication): + print(pub) + continue reference_pub_obj.group = "depth" nodes.append(reference_pub_obj) # appends the object to the set of nodes @@ -148,19 +137,15 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references # adds edge only if citation already exists - elif [pub.doi_url,reference.doi_url] not in edges: - edges.append([pub.doi_url,reference.doi_url]) + elif [pub.doi_url,reference] not in edges: + edges.append([pub.doi_url,reference]) return references_pub_obj_list -# recursive function to implement height-first-search on references -# references_pub_obj_list: input list of references as publication objects -# search_depth: current search_depth of height-first-search -# search_depth_max: maximal search_depth for dfs def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): ''' :param references_pub_obj_list: list of publications which references will be added - :type references_pub_obj_list: list of objects of type Class Publications + :type references_pub_obj_list: List[Publication] :param search_depth: current depth to search for references :type search_depth: int @@ -170,6 +155,8 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m :param test_var: variable to differenciate between test and url call :type test_var: boolean + + recursive function to implement height-first-search on references ''' # adds next level to nodes/edges @@ -180,14 +167,12 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m if (search_depth < search_depth_max): process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var) - - def create_graph_structure_citations(pub, search_height, search_height_max, test_var): ''' :param pub: publication which citations will be added - :type pub: Class Publication + :type pub: Publication :param search_height: current height to search for citations :type search_height_max: int @@ -197,28 +182,24 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test :param test_var: variable to differenciate between test and url call :type test_var: boolean + + adds a node for every citing publication unknown + adds edges to added citations ''' citations_pub_obj_list = [] for citation in pub.citations: not_in_nodes = True for node in nodes: # checks every citation for duplication - if (citation.doi_url == node.doi_url): + if (citation == node.doi_url): not_in_nodes = False break if (not_in_nodes): if (search_height < search_height_max): #checks if its a test and chooses input function accordingly - if (test_var): - citation_pub_obj = input_test_func(citation.doi_url) - else: - #citation_pub_obj = Input(citation.doi_url) - inter = Input() - try: - citation_pub_obj = inter.get_publication(citation.doi_url) - except ValueError: - continue - except IndexError: - continue + citation_pub_obj = get_pub(citation, test_var) + if (type(citation_pub_obj) != Publication): + print(pub) + continue citation_pub_obj.group = "height" nodes.append(citation_pub_obj) @@ -226,20 +207,15 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test citations_pub_obj_list.append(citation_pub_obj) # adds only edge if citation already exists - elif [citation.doi_url,pub.doi_url] not in edges: - edges.append([citation.doi_url,pub.doi_url]) + elif [citation,pub.doi_url] not in edges: + edges.append([citation,pub.doi_url]) return citations_pub_obj_list - -# recursive function to implement height-first-search on citations -# citations_pub_obj_list: input list of citations as publication objects -# search_height: current search_height of height-first-search -# search_height_max: maximal search_height for dfs def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): ''' :param citations_pub_obj_list: list of publications which citations will be added - :type citations_pub_obj_list: list of objects of type Class Publications + :type citations_pub_obj_list: List[Publication] :param search_height: current height to search for citations :type search_height_max: int @@ -249,6 +225,8 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m :param test_var: variable to differenciate between test and url call :type test_var: boolean + + recursive function to implement depth-first-search on citations ''' # adds next level to nodes/edges @@ -261,8 +239,6 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m - - def process_main(doi_input_list, search_height, search_depth, test_var = False): ''' :param doi_input_list: input list of doi from UI @@ -276,6 +252,8 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): :param test_var: variable to differenciate between test and url call :type test_var: boolean + + main function to start graph generation ''' # ERROR-Handling doi_array = NULL @@ -291,10 +269,10 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): print("Error, search_depth of search must be positive") - + # creates empty lists to save nodes and edges global nodes, edges - nodes = [] # create empty array for the nodes - edges = [] # create empty array for the edges + nodes = [] + edges = [] # initializes nodes/edges from input and gets a list with publication objects for citations and references returned references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) @@ -307,14 +285,6 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): complete_inner_edges(test_var) # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes,edges) - - - # only for unit tests - if (test_var == True): - doi_nodes_list = [] - for node in nodes: - doi_nodes_list.append(node.doi_url) - return(doi_nodes_list, edges) + output_to_json(nodes,edges, test_var) return(nodes,edges) diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/Processing_unittest.py index 772d57204ce3374211d1d1fd3d08d279f085aac3..f0a0a879f71bae5581d85bf79f28183cc11a3442 100644 --- a/verarbeitung/Processing_unittest.py +++ b/verarbeitung/Processing_unittest.py @@ -1,14 +1,22 @@ import unittest + from Processing import process_main +from import_from_json import input_from_json +from update_graph import check_graph_updates class ProcessingTest(unittest.TestCase): + maxDiff = None + + def testCycle(self): nodes, edges = process_main(['doiz1'],1,1,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) nodes, edges = process_main(['doiz1'],2,2,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) #def testBigCycle(self): @@ -19,48 +27,88 @@ class ProcessingTest(unittest.TestCase): def testEmptyDepthHeight(self): nodes, edges = process_main(['doi1'],0,0,True) - self.assertCountEqual(nodes,['doi1']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi1']) self.assertCountEqual(edges, []) nodes, edges = process_main(['doi1', 'doi2'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2']) self.assertCountEqual(edges, [['doi1', 'doi2']]) nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2', 'doi3']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) def testInnerEdges(self): nodes, edges = process_main(['doi_ie1'],1,1,True) - self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) def testRightHeight(self): nodes, edges = process_main(['doi_h01'],1,0,True) - self.assertCountEqual(nodes,['doi_h01']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h01']) self.assertCountEqual(edges, []) nodes, edges = process_main(['doi_h02'],1,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) self.assertCountEqual(edges, [['doi_h1','doi_h02']]) nodes, edges = process_main(['doi_h02'],2,0,True) - self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) def testRightDepth(self): nodes, edges = process_main(['doi_d01'],0,1,True) - self.assertCountEqual(nodes,['doi_d01']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d01']) self.assertCountEqual(edges, []) nodes, edges = process_main(['doi_d02'],0,1,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) self.assertCountEqual(edges, [['doi_d02','doi_d1']]) nodes, edges = process_main(['doi_d02'],0,2,True) - self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2']) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) + def test_import_from_json(self): + nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True) + nodes_new, edges_new = input_from_json('test_output.json') + self.assertCountEqual(nodes_old,nodes_new) + self.assertCountEqual(edges_old, edges_new) + + def test_deleted_input_dois(self): + nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True) + nodes_old_both, edges_old_both = process_main(['doi_lg_1_i','doi_lg_2_i'],2,2,True) + nodes_new_both, edges_new_both = input_from_json('test_output.json') + nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, True) + self.assertCountEqual(nodes_old_single,nodes_new_single) + self.assertCountEqual(edges_old_single, edges_new_single) + + + + +def keep_only_dois(nodes): + ''' + :param nodes: input list of nodes of type Publication + :type nodes: List[Publication] + + gets nodes of type pub and return only their doi + ''' + doi_list = [] + for node in nodes: + doi_list.append(node.doi_url) + return doi_list + + if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc index f16ff9fc08e6d1b3ae555bd65772c0b66c866779..a86e804167e4c2fdf2e9a1f4b354ef6506381740 100644 Binary files a/verarbeitung/__pycache__/Processing.cpython-39.pyc and b/verarbeitung/__pycache__/Processing.cpython-39.pyc differ diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc index 604973a2f2c133e5085aba44dcabe0ac4fa9ac05..550ce300289531fa2018d232b0f3bbf9986d3cd0 100644 Binary files a/verarbeitung/__pycache__/input_test.cpython-39.pyc and b/verarbeitung/__pycache__/input_test.cpython-39.pyc differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc index 20afc4f92dda6846dc720c9f3eb15bed60c69002..3a90cf68398464b1132fc65f1a598bac313a51a3 100644 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and b/verarbeitung/__pycache__/json_demo.cpython-39.pyc differ diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py new file mode 100644 index 0000000000000000000000000000000000000000..a8636a4fcf755943d170abbf84e50fce937a67c2 --- /dev/null +++ b/verarbeitung/get_pub_from_input.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +""" +A function to return an object of Type Publication for a given doi + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +sys.path.append("../") + +from input.interface import InputInterface as Input +from input_test import input_test_func + + +def get_pub(pub_doi, test_var): + #checks if it's a test and chooses appropiate function + if(test_var): + pub = input_test_func(pub_doi) + + #checks that it isnt a test and chooses standart-input function + else: + inter = Input() + try: + pub = inter.get_publication(pub_doi) #creates an object of class Publication + except ValueError: + return(ValueError) + except IndexError: + return(IndexError) + except AttributeError: + return(AttributeError) + return(pub) \ No newline at end of file diff --git a/verarbeitung/import_from_json.py b/verarbeitung/import_from_json.py index 9fe099f0e794933fdd12dc4b3bf85af290dd19dc..11e5d82f29c7d254422da26fb5e723db24341585 100644 --- a/verarbeitung/import_from_json.py +++ b/verarbeitung/import_from_json.py @@ -15,46 +15,19 @@ __status__ = "Production" import json -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') import sys sys.path.append("../") -from input.interface import InputInterface as Input - - -class Publication: - #def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): - def __init__(self, doi_url, title, contributors, journal, publication_date, group): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - self.references = [] - self.citations = [] - self.group = group - - -class Citation: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -class Reference: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date + +from input.publication import Publication, Citation + def create_pubs_from_json(input_dict): ''' - :param input_dict: dictionary read from old graph Json File - :type json_file: dictionary + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + + creates list of publication retrieved from old json file ''' #iterates over the list of nodes @@ -67,29 +40,24 @@ def create_pubs_from_json(input_dict): def add_ref_and_cit_to_pubs(input_dict): ''' - :param input_dict: dictionary read from old graph Json File - :type json_file: dictionary + :param input_dict: dictionary read from old graph Json File + :type json_file: dictionary + + adds references and citations to retrieved publication list ''' # iterates over the list of edges for edge in input_dict["links"]: for source in list_of_nodes_py: for target in list_of_nodes_py: + + # when correct dois found, adds then as references/citatons to publication list if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])): - new_reference = Reference(target.doi_url, target.title, target.contributors, target.journal, target.publication_date) + new_reference = Citation(target.doi_url, target.title, target.journal, target.contributors, "Reference") source.references.append(new_reference) - new_citation = Citation(source.doi_url, source.title, source.contributors, source.journal, source.publication_date) + new_citation = Citation(source.doi_url, source.title, source.journal, source.contributors, "Citation") target.citations.append(new_citation) - # adds reference to current node - # if (node.doi_url == edge["source"]): - # new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) - # node.references.append(new_reference) - - # # adds citation to current node - # if (node.doi_url == edge["target"]): - # new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) - # node.citations.append(new_citation) # adds edge to list list_of_edges_py.append([edge["source"],edge["target"]]) @@ -97,8 +65,10 @@ def add_ref_and_cit_to_pubs(input_dict): def input_from_json(json_file): ''' - :param json_file: Json-Datei for the old graph - :type json_file: Json File + :param json_file: Json-Datei for the old graph + :type json_file: String + + retrieves information from old json file to be reused for new graph construction ''' # creates global sets for nodes and edges @@ -117,5 +87,4 @@ def input_from_json(json_file): add_ref_and_cit_to_pubs(input_dict) - return(list_of_nodes_py, list_of_edges_py) - + return(list_of_nodes_py, list_of_edges_py) \ No newline at end of file diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py index dcd476cbfda3e65a3f2f2680270c038c9891e2e6..7172d579009451fb37a96af14f318887b62a8766 100644 --- a/verarbeitung/input_test.py +++ b/verarbeitung/input_test.py @@ -1,51 +1,31 @@ import sys sys.path.append("../") -from input.publication import Publication - - -class Publication: - def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - if references is None: - self.references = [] - else: - self.references = ref(references) - if citations is None: - self.citations = [] - else: - self.citations = cit(citations) - self.group = group - - -class Citation: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -class Reference: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date +from input.publication import Publication, Citation + def input_test_func(pub_doi): + ''' + :param pub_doi: pub doi to find publication in list_of_arrays + :type pub_doi: String + + returns the publication class for given doi + ''' + for array in list_of_arrays: if pub_doi == array[0]: - pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7], array[8]) + pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7]) return pub def cit(list_doi): + ''' + :param list_doi list of citation dois to get their Citation Class + :type list_doi: List[String] + + returns a list of citations objects for given doi list + ''' + cits = [] for doi_url in list_doi: for array in list_of_arrays: @@ -54,6 +34,13 @@ def cit(list_doi): return cits def ref(list_doi): + ''' + :param list_doi list of reference dois to get their Reference Class + :type list_doi: List[String] + + returns a list of reference objects for given doi list + ''' + refs = [] for doi_url in list_doi: for array in list_of_arrays: @@ -62,27 +49,57 @@ def ref(list_doi): return refs -beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], ['doi3'], ''] -beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], ''] -beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], ''] - -zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], ''] -zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], ''] - -inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], ''] -inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], ''] -inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], ''] - -right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], ''] -right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], ''] -right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], ''] -right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], ''] -right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], ''] - -right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], ''] -right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], ''] -right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], ''] -right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], ''] -right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], ''] -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3] +beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] +beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']] +beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []] + +zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']] +zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']] + +inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']] +inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']] +inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []] + +right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []] +right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']] +right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']] +right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']] +right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []] + +right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []] +right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []] +right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []] +right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []] +right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []] + +large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []] +large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []] +large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12'], []] +large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_h12','doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']] +large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']] +large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']] +large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']] +large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']] +large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']] +large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']] +large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12']] + +large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []] +large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []] +large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']] +large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']] +large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i'], ['doi_lg_2_h21','doi_lg_2_h22']] +large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']] +large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12']] +large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']] +large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']] +large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']] +large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] +large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, + right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3, + large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12, + large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, + large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23] diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py index 893945ea3a410b6d04e455862e09935fdab5247e..1ad0002a42ed15e286e88d31df50bdc88a6080e5 100644 --- a/verarbeitung/json_demo.py +++ b/verarbeitung/json_demo.py @@ -12,11 +12,11 @@ def format_nodes(V): list_of_node_dicts = list() for node in V: new_dict = dict() + new_dict["doi"] = node.doi_url new_dict["name"] = node.title new_dict["author"] = node.contributors new_dict["year"] = node.publication_date new_dict["journal"] = node.journal - new_dict["doi"] = node.doi_url new_dict["group"] = node.group new_dict["citations"] = len(node.citations) list_of_node_dicts.append(new_dict) @@ -34,14 +34,18 @@ def format_edges(E): return list_of_edge_dicts # combine the lists of nodes and edges to a dictionary and saves it to a json file -def output_to_json(V,E): +def output_to_json(V,E, test_var): dict_of_all = dict() list_of_node_dicts = format_nodes(V) list_of_edge_dicts = format_edges(E) dict_of_all["nodes"] = list_of_node_dicts dict_of_all["links"] = list_of_edge_dicts - with open('json_text.json','w') as outfile: - json.dump(dict_of_all, outfile) + if (test_var): + with open('test_output.json','w') as outfile: + json.dump(dict_of_all, outfile) + else: + with open('json_text.json','w') as outfile: + json.dump(dict_of_all, outfile) #knoten = ["doi1", "doi2", "doi3"] #kanten = [[1,2],[3,4],[5,6]] diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index fbdfcd70ab282dfc115126ad6d74b0777a888c83..de3ddf528cc6224a68946f5b7fab56276120f741 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "doi": "doi_h02", "group": "input", "citations": 1}, {"name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "doi": "doi_h1", "group": "height", "citations": 1}, {"name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "doi": "doi_h2", "group": "height", "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]} \ No newline at end of file +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "input", "citations": 5}], "links": []} \ No newline at end of file diff --git a/verarbeitung/json_with_citations_v2.json b/verarbeitung/json_with_citations_v2.json deleted file mode 100644 index 60c61600a3aed91bf9ee13e85382d8f211831737..0000000000000000000000000000000000000000 --- a/verarbeitung/json_with_citations_v2.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00249", "group": "input", "citations": 5}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "group": "height", "citations": 1}, {"name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00741", "group": "height", "citations": 0}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "group": "height", "citations": 8}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00250", "group": "height", "citations": 12}, {"name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00203", "group": "input", "citations": 1}, {"name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.0c01006", "group": "depth", "citations": 14}, {"name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.9b00557", "group": "depth", "citations": 9}, {"name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300604z", "group": "depth", "citations": 100}, {"name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.5b00834", "group": "depth", "citations": 48}, {"name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci700036j", "group": "depth", "citations": 32}, {"name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci500209e", "group": "depth", "citations": 100}, {"name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm2005145", "group": "depth", "citations": 100}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp9723574", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp972358w", "group": "depth", "citations": 100}, {"name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00675", "group": "depth", "citations": 25}, {"name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "group": "depth", "citations": 12}, {"name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm300687e", "group": "depth", "citations": 100}, {"name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.8b00312", "group": "depth", "citations": 74}, {"name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00778", "group": "depth", "citations": 65}, {"name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm0306430", "group": "depth", "citations": 97}, {"name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020406h", "group": "depth", "citations": 100}, {"name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300493w", "group": "depth", "citations": 99}, {"name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm049314d", "group": "depth", "citations": 100}, {"name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "doi": "https://doi.org/10.1021/acsomega.1c04320", "group": "height", "citations": 0}, {"name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "group": "height", "citations": 0}, {"name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "group": "height", "citations": 0}, {"name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "doi": "https://doi.org/10.1021/acs.jpca.1c06264", "group": "height", "citations": 0}, {"name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00226", "group": "height", "citations": 0}, {"name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "group": "height", "citations": 11}, {"name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "doi": "https://doi.org/10.1021/acs.est.9b06379", "group": "height", "citations": 100}, {"name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci049714+", "group": "depth", "citations": 99}, {"name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci3001277", "group": "depth", "citations": 100}, {"name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.5b00559", "group": "depth", "citations": 98}, {"name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci7004498", "group": "depth", "citations": 100}, {"name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020155c", "group": "depth", "citations": 100}, {"name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm9602928", "group": "depth", "citations": 100}, {"name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "doi": "https://doi.org/10.1021/ci025599w", "group": "depth", "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]} \ No newline at end of file diff --git a/verarbeitung/print_graph_test.py b/verarbeitung/print_graph_test.py index 3209485d6d8cd65449c7e151b54616d3cf67568e..efb62ec6985338e1eaf8fee099a4b526bde8d8fc 100644 --- a/verarbeitung/print_graph_test.py +++ b/verarbeitung/print_graph_test.py @@ -51,6 +51,14 @@ def print_extended_graph(nodes, edges): print(len(edges)) print(" ") +def print_simple(nodes, edges): + # for node in nodes: + # print(node) + # for edge in edges: + # print(edge) + print(len(nodes)) + print(len(edges)) + print(" ") # program test with some random dois def try_known_publications(): @@ -79,15 +87,15 @@ def try_known_publications(): def try_delete_nodes(): doi_list = [] doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') + #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') nodes, edges = process_main(doi_list,1,1) - print_graph(nodes, edges) + #print_simple(nodes, edges) - list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') - doi_list = [] - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py) - print_graph(valid_nodes, valid_edges) + # list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') + # doi_list = [] + # doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + # valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py) + # print_simple(valid_nodes, valid_edges) def try_import(): nodes, edges = input_from_json('json_text.json') diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json new file mode 100644 index 0000000000000000000000000000000000000000..b013fbfb4c1ac100c93ae21dfe722b309c10a14b --- /dev/null +++ b/verarbeitung/test_output.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]} \ No newline at end of file diff --git a/verarbeitung/update_graph.py b/verarbeitung/update_graph.py index 571106805bb5952d7531289f8dbd1854d0792aca..985345630f3388c4cdbcbd888a1e06674509f33a 100644 --- a/verarbeitung/update_graph.py +++ b/verarbeitung/update_graph.py @@ -13,17 +13,27 @@ __status__ = "Production" #__version__ = "" #__maintainer__ = "" + import sys from pathlib import Path -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +from os import error sys.path.append("../") -from input.interface import InputInterface as Input -from input_test import input_test_func + +from input.publication import Publication +from get_pub_from_input import get_pub from Knoten_Vergleich import doi_listen_vergleichen -from Kanten_Vergleich import back_to_valid_edges +from update_graph_del import delete_nodes_and_edges def get_old_input_dois(old_obj_input_list): + ''' + :param old_obj_input_list: list of publications retrieved from old json file + :type old_obj_input_list: List[Publication] + + function to return pub dois for old publications of group input retrieved from json file + ''' + + # new list to save doi_url for each old publication of group input old_input_dois = [] for pub in old_obj_input_list: if (pub.group == "input"): @@ -31,55 +41,65 @@ def get_old_input_dois(old_obj_input_list): return old_input_dois def get_new_input_dois(new_input, test_var): - doi_input_list = [] + ''' + :param new_input: input list of doi from UI + :type new_input: list of strings + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to return pub dois for input urls + ''' + + # new list to save doi_url for each new input url + new_input_dois = [] for new_node in new_input: - if(test_var): - pub = input_test_func(new_node) - else: - #print(pub_doi) - inter = Input() - try: - pub = inter.get_publication(new_node) - except ValueError: - continue - except IndexError: - continue - doi_input_list.append(pub.doi_url) - return doi_input_list - -def delete_ref_nodes_rec(pub):#, old_obj_list): - for reference in pub.references: - for ref_pub in processed_input_list: - if (ref_pub.doi_url == reference.doi_url): - delete_ref_nodes_rec(ref_pub) - if (pub.group != "input"): - processed_input_list.remove(pub) - -def delete_cit_nodes_rec(pub): - for citation in pub.citations: - for cit_pub in processed_input_list: - if (cit_pub.doi_url == citation.doi_url): - delete_cit_nodes_rec(cit_pub) - if (pub.group != "input"): - processed_input_list.remove(pub) + # retrieves information and adds to new list if successful + pub = get_pub(new_node, test_var) + if (type(pub) != Publication): + print(pub) + continue + + new_input_dois.append(pub.doi_url) + return(new_input_dois) def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, test_var = False): + ''' + :param new_doi_input_list: input list of doi from UI + :type new_doi_input_list: list of strings + + :param old_obj_input_list: list of publications retrieved from old json file + :type old_obj_input_list: List[Publication] + + :param old_edges_list: list of links between publications retrieved from old json file + :type old_edges_list: List[List[String,String]] + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges + ''' + + # one global list to save the process of removing unneeded publications and one to save valid edges global processed_input_list, valid_edges - processed_input_list = old_obj_input_list + processed_input_list = old_obj_input_list.copy() valid_edges = [] + # save the return values of global lists + processed_input_list_del = [] + valid_edges_del = [] + + # get dois from lists to compare for differences old_doi_input_list = get_old_input_dois(old_obj_input_list) new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var) + + # retrieve which publications are already known, removed, inserted common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list) - if (len(inserted_nodes) == 0): - for del_node in deleted_nodes: - for pub in processed_input_list: - if (del_node == pub.doi_url): - delete_ref_nodes_rec(pub)#, processed_input_list) - delete_cit_nodes_rec(pub)#, processed_input_list) - processed_input_list.remove(pub) - valid_edges = back_to_valid_edges(old_edges_list, processed_input_list) + + # deletes publications and edges from node_list if publications can no longer be reached + if (len(deleted_nodes) > 0): + processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, deleted_nodes, old_doi_input_list, old_edges_list) - return(processed_input_list, valid_edges) + return(processed_input_list_del, valid_edges_del) diff --git a/verarbeitung/update_graph_del.py b/verarbeitung/update_graph_del.py new file mode 100644 index 0000000000000000000000000000000000000000..10ca42b806eeaa08e2e7a9492cc9dadd9f1d7f62 --- /dev/null +++ b/verarbeitung/update_graph_del.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +Functions to remove publications/links from nodes/edges list, if they can no longer be reached + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +from pathlib import Path +sys.path.append("../") + +from input.interface import InputInterface as Input +from Kanten_Vergleich import back_to_valid_edges + + +def delete_ref_nodes_rec(pub): + ''' + :param pub: pub go get deleted after recursive call + :type pub: Publication + + function that removes nodes of group "height", if they aren't reachable from input nodes + ''' + for reference in pub.references: + for ref_pub in processed_list: + if (ref_pub.doi_url == reference): + + # to find a cyclus and avoid recursion error + if (reference not in pub.citations): + delete_ref_nodes_rec(ref_pub) + + # removes publication from list after recursion and if it's not of group input + if (pub.group != "input"): + processed_list.remove(pub) + + +def delete_cit_nodes_rec(pub): + ''' + :param pub: publication to be removed after recursive call + :type pub: Publication + + function that removes publications of group "depth" from input_list, if they aren't reachable from input nodes + ''' + for citation in pub.citations: + for cit_pub in processed_list: + if (cit_pub.doi_url == citation): + + # to find a cyclus and avoid recursion error + if (citation not in pub.references): + delete_cit_nodes_rec(cit_pub) + + # removes publication from list after recursion and if it's not of group input + if (pub.group != "input"): + processed_list.remove(pub) + + +def delete_nodes_and_edges(input_list, deleted_nodes, old_doi_input_list, old_edges_list): + ''' + :param input_list: list of publications to get reduced + :type input_list: List[Publication] + + :param deleted_nodes: list of input dois which are not in new call + :type deleted_nodes: List[String] + + :param old_doi_input_list: list of input dois from old call + :type old_doi_input_list: List[String] + + :param old_edges_list: list of links between publications from old call + :type old_edges_list: List[List[String,String]] + + function to start recursive node removal for references and citations and to return edge list to valid state + ''' + + # global list to save the process of removing unneeded publications + global processed_list + processed_list = input_list.copy() + + for del_node in deleted_nodes: + for pub in processed_list: + if (del_node == pub.doi_url): + + # checks for every reference if it is citet more than once. If not it calls deletion function + for reference in pub.references: + only_reference = True + for ref_cit in processed_list: + if (reference == ref_cit.doi_url): + for citation in ref_cit.citations: + if ((citation in old_doi_input_list) and (citation != del_node)): + only_reference = False + break + if (only_reference == False): + break + if (only_reference): + delete_ref_nodes_rec(pub) + + # checks for every citation if it cites more than once. If not it calls deletion function + for citation in pub.citations: + only_reference = True + for cit_ref in processed_list: + if (citation == cit_ref.doi_url): + for reference in cit_ref.references: + if ((reference in old_doi_input_list) and (reference != del_node)): + only_reference = False + break + if (only_reference == False): + break + if (only_reference): + delete_cit_nodes_rec(pub) + + # removes publication of type input after start of recursive call to both directions + processed_list.remove(pub) + + valid_edges = back_to_valid_edges(old_edges_list, processed_list) + return(processed_list, valid_edges) \ No newline at end of file