diff --git a/verarbeitung/construct_new_graph/Processing.py b/verarbeitung/construct_new_graph/Processing.py index 34e5e219e69e118332c294946eb15bd430204e07..0f3dc51928b2df1e83a2e30f7cab75db313bfcb9 100644 --- a/verarbeitung/construct_new_graph/Processing.py +++ b/verarbeitung/construct_new_graph/Processing.py @@ -86,14 +86,14 @@ def complete_inner_edges(): for node in nodes: if (node.group < 0): for citation in node.citations: - for cit in nodes: - if (citation == cit.doi_url and [citation, node.doi_url] not in edges): - edges.append([citation, node.doi_url]) + for pub in nodes: + if ((pub.doi_url == citation.doi_url) and ([citation.doi_url, node.doi_url] not in edges)): + edges.append([citation.doi_url, node.doi_url]) if (node.group > 0): for reference in node.references: - for ref in nodes: - if (reference == ref.doi_url and [node.doi_url, reference] not in edges): - edges.append([node.doi_url,reference]) + for pub in nodes: + if ((pub.doi_url == reference.doi_url) and ([node.doi_url, reference.doi_url] not in edges)): + edges.append([node.doi_url,reference.doi_url]) def process_main(doi_input_list, search_height, search_depth, test_var = False): @@ -142,6 +142,6 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): complete_inner_edges() # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes,edges, test_var) + output_to_json(nodes, edges, test_var) return(nodes,edges) diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py index 4c2cac932574094a8e2f96ce793da99eb6ffd81a..2faab32324fb12a6754dfb0e40891fcfa72c6553 100644 --- a/verarbeitung/construct_new_graph/add_citations_rec.py +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -95,12 +95,12 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty for citation in get_cit_type_list(pub, cit_type): not_in_nodes = True for node in nodes: # checks every citation for duplication - if (citation == node.doi_url): + if (citation.doi_url == node.doi_url): not_in_nodes = False break if (not_in_nodes): if (search_depth < search_depth_max): #checks if its a test and chooses input function accordingly - citation_pub_obj = get_pub(citation, test_var) + citation_pub_obj = get_pub(citation.doi_url, test_var) if (type(citation_pub_obj) != Publication): print(pub) continue @@ -114,13 +114,14 @@ def create_graph_structure_citations(pub, search_depth, search_depth_max, cit_ty nodes.append(citation_pub_obj) citations_pub_obj_list.append(citation_pub_obj) - # adds only edge if citation already exists - elif (cit_type == "Citation"): - if ([citation,pub.doi_url] not in edges): - edges.append([citation,pub.doi_url]) - elif (cit_type == "Reference"): - if ([pub.doi_url,citation] not in edges): - edges.append([pub.doi_url,citation]) + # adds just the edge if citation already exists + else: + if (cit_type == "Citation"): + if ([citation.doi_url,pub.doi_url] not in edges): + edges.append([citation.doi_url,pub.doi_url]) + else: + if ([pub.doi_url,citation.doi_url] not in edges): + edges.append([pub.doi_url,citation.doi_url]) return citations_pub_obj_list diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py index e3fdedd2e669d2ea753fd72f1288ba519d4775bd..2766ba836e2ae807c0950ac60389a4f620c39ba5 100644 --- a/verarbeitung/get_pub_from_input.py +++ b/verarbeitung/get_pub_from_input.py @@ -33,15 +33,14 @@ def get_pub(pub_doi, test_var): function to return an object of type Publication for given input doi depending on whether its a test or url doi ''' #checks if it's a test and chooses appropiate function - # print(pub_doi) if(test_var): - pub = input_test_func(pub_doi) + pub = input_test_func(pub_doi) #checks that it isnt a test and chooses standart-input function else: inter = Input() try: - pub = inter.get_publication(pub_doi.doi_url) #creates an object of class Publication + pub = inter.get_publication(pub_doi) #creates an object of class Publication except AttributeError: pub = inter.get_publication(pub_doi) except ValueError: diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index 36aad968eb0431395a389928ea3e9e4695198d85..183e7647e90c750badbd216f3ec62d9935c79e0c 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Input", "depth": 0, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Input", "depth": 0, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 12}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}]} \ No newline at end of file +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}]} \ No newline at end of file diff --git a/verarbeitung/test/Processing_unittest.py b/verarbeitung/test/Processing_unittest.py index a595b44eee0b67e8b767c4f76576d17c8b20d61f..47dd268885d14a254ced584c24d5a9e4dcafb0e8 100644 --- a/verarbeitung/test/Processing_unittest.py +++ b/verarbeitung/test/Processing_unittest.py @@ -12,16 +12,16 @@ class ProcessingTest(unittest.TestCase): maxDiff = None - # def testCycle(self): - # nodes, edges = process_main(['doiz1'],1,1,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) - # self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) + def testCycle(self): + nodes, edges = process_main(['doiz1'],1,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) - # nodes, edges = process_main(['doiz1'],2,2,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) - # self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) + nodes, edges = process_main(['doiz1'],2,2,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) #def testBigCycle(self): @@ -29,66 +29,66 @@ class ProcessingTest(unittest.TestCase): #def testEmptyDepth(self): - # def testEmptyDepthHeight(self): - # nodes, edges = process_main(['doi1'],0,0,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi1']) - # self.assertCountEqual(edges, []) + def testEmptyDepthHeight(self): + nodes, edges = process_main(['doi1'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi1']) + self.assertCountEqual(edges, []) - # nodes, edges = process_main(['doi1', 'doi2'],0,0,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes, ['doi1','doi2']) - # self.assertCountEqual(edges, [['doi1', 'doi2']]) + nodes, edges = process_main(['doi1', 'doi2'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2']) + self.assertCountEqual(edges, [['doi1', 'doi2']]) - # nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) - # self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) + nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) + self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) - # def testInnerEdges(self): - # nodes, edges = process_main(['doi_ie1'],1,1,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) - # self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) + def testInnerEdges(self): + nodes, edges = process_main(['doi_ie1'],1,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) + self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) - # def testRightHeight(self): - # nodes, edges = process_main(['doi_h01'],1,0,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi_h01']) - # self.assertCountEqual(edges, []) - - # nodes, edges = process_main(['doi_h02'],1,0,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) - # self.assertCountEqual(edges, [['doi_h1','doi_h02']]) - - # nodes, edges = process_main(['doi_h02'],2,0,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) - # self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) - - # def testRightDepth(self): - # nodes, edges = process_main(['doi_d01'],0,1,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi_d01']) - # self.assertCountEqual(edges, []) - - # nodes, edges = process_main(['doi_d02'],0,1,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) - # self.assertCountEqual(edges, [['doi_d02','doi_d1']]) - - # nodes, edges = process_main(['doi_d02'],0,2,True) - # doi_nodes = keep_only_dois(nodes) - # self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) - # self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) - - # def test_import_from_json(self): - # nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True) - # nodes_new, edges_new = input_from_json('test_output.json') - # self.assertCountEqual(nodes_old,nodes_new) - # self.assertCountEqual(edges_old, edges_new) + def testRightHeight(self): + nodes, edges = process_main(['doi_h01'],1,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h01']) + self.assertCountEqual(edges, []) + + nodes, edges = process_main(['doi_h02'],1,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) + self.assertCountEqual(edges, [['doi_h1','doi_h02']]) + + nodes, edges = process_main(['doi_h02'],2,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) + self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) + + def testRightDepth(self): + nodes, edges = process_main(['doi_d01'],0,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d01']) + self.assertCountEqual(edges, []) + + nodes, edges = process_main(['doi_d02'],0,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) + self.assertCountEqual(edges, [['doi_d02','doi_d1']]) + + nodes, edges = process_main(['doi_d02'],0,2,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) + self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) + + def test_import_from_json(self): + nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True) + nodes_new, edges_new = input_from_json('test_output.json') + self.assertCountEqual(nodes_old,nodes_new) + self.assertCountEqual(edges_old, edges_new) def test_deleted_input_dois(self): nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True) diff --git a/verarbeitung/test/input_test.py b/verarbeitung/test/input_test.py index 928c8b401fc2377b05cb5fe3aa6500ea0c0c0261..9efb376dd843c812984ea10651dbb221f327e471 100644 --- a/verarbeitung/test/input_test.py +++ b/verarbeitung/test/input_test.py @@ -14,11 +14,13 @@ def input_test_func(pub_doi): for array in list_of_arrays: if pub_doi == array[0]: - pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7]) + pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], [], []) + pub.citations = cit(array[7], "Citation") + pub.references = cit(array[6], "Reference") return pub -def cit(list_doi): +def cit(list_doi, cit_type): ''' :param list_doi list of citation dois to get their Citation Class :type list_doi: List[String] @@ -30,24 +32,9 @@ def cit(list_doi): for doi_url in list_doi: for array in list_of_arrays: if doi_url == array[0]: - cits.append(Citation(array[0], array[1], array[2], array[3], array[4])) + cits.append(Citation(array[0], array[1], array[2], array[3], cit_type)) return cits -def ref(list_doi): - ''' - :param list_doi list of reference dois to get their Reference Class - :type list_doi: List[String] - - returns a list of reference objects for given doi list - ''' - - refs = [] - for doi_url in list_doi: - for array in list_of_arrays: - if doi_url == array[0]: - refs.append(Citation(array[0], array[1], array[2], array[3], array[4])) - return refs - beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] @@ -91,12 +78,13 @@ large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']] large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i','doi_cg_i'], ['doi_lg_2_h21','doi_lg_2_h22']] large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']] -large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12','doi_cg_i']] -large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']] -large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']] -large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']] +large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12','doi_cg_i','doi_lg_2_h11']] +large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], ['doi_lg_2_i','doi_lg_2_d21'], ['doi_lg_2_i']] +large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d22','doi_lg_2_d23','doi_lg_2_d24'], ['doi_lg_2_h24','doi_lg_2_i']] +large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d11']] large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] +large_graph_2_d24 = ['doi_lg_2_d24', 'title_lg_2_d24', ['contributor_lg_2_d24'], 'journal_lg_2_d24', 'date_lg_2_d24', ['subject_lg_2_d24'], [], ['doi_lg_2_d12']] crossed_graph_h21 = ['doi_cg_h21', 'title_cg_h21', ['contributor_cg_h21'], 'journal_cg_h21', 'date_cg_h21', ['subject_cg_h21'], ['doi_cg_h11'], []] crossed_graph_h22 = ['doi_cg_h22', 'title_cg_h22', ['contributor_cg_h22'], 'journal_cg_h22', 'date_cg_h22', ['subject_cg_h22'], ['doi_cg_h11'], []] diff --git a/verarbeitung/test/test_graphs_plan.pdf b/verarbeitung/test/test_graphs_plan.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c45b187bf3665e98fc84a4267bad2cbb9e3e64fa Binary files /dev/null and b/verarbeitung/test/test_graphs_plan.pdf differ diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json index e5a90e6f507ebe0f55160a0a981f808464b941e6..40a7bc557e428357a6d173f86c5aea4c19b457b3 100644 --- a/verarbeitung/test_output.json +++ b/verarbeitung/test_output.json @@ -1 +1 @@ -{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_i", "name": "title_lg_2_i", "author": ["contributor_lg_2_i"], "year": "date_lg_2_i", "journal": "journal_lg_2_i", "group": "Input", "depth": 0, "citations": 3}, {"doi": "doi_lg_2_d11", "name": "title_lg_2_d11", "author": ["contributor_lg_2_d11"], "year": "date_lg_2_d11", "journal": "journal_lg_2_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_2_d12", "name": "title_lg_2_d12", "author": ["contributor_lg_2_d12"], "year": "date_lg_2_d12", "journal": "journal_lg_2_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_2_h11", "name": "title_lg_2_h11", "author": ["contributor_lg_2_h11"], "year": "date_lg_2_h11", "journal": "journal_lg_2_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_2_h12", "name": "title_lg_2_h12", "author": ["contributor_lg_2_h12"], "year": "date_lg_2_h12", "journal": "journal_lg_2_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_cg_i", "name": "title_cg_i", "author": ["contributor_cg_i"], "year": "date_cg_i", "journal": "journal_cg_i", "group": "Citedby", "depth": 1, "citations": 3}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h21", "name": "title_lg_2_h21", "author": ["contributor_lg_2_h21"], "year": "date_lg_2_h21", "journal": "journal_lg_2_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h22", "name": "title_lg_2_h22", "author": ["contributor_lg_2_h22"], "year": "date_lg_2_h22", "journal": "journal_lg_2_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_2_h23", "name": "title_lg_2_h23", "author": ["contributor_lg_2_h23"], "year": "date_lg_2_h23", "journal": "journal_lg_2_h23", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_lg_2_h24", "name": "title_lg_2_h24", "author": ["contributor_lg_2_h24"], "year": "date_lg_2_h24", "journal": "journal_lg_2_h24", "group": "Citedby", "depth": 2, "citations": 1}, {"doi": "doi_cg_h11", "name": "title_cg_h11", "author": ["contributor_cg_h11"], "year": "date_cg_h11", "journal": "journal_cg_h11", "group": "Citedby", "depth": 2, "citations": 2}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_2_d21", "name": "title_lg_2_d21", "author": ["contributor_lg_2_d21"], "year": "date_lg_2_d21", "journal": "journal_lg_2_d21", "group": "Reference", "depth": -2, "citations": 1}, {"doi": "doi_lg_2_d22", "name": "title_lg_2_d22", "author": ["contributor_lg_2_d22"], "year": "date_lg_2_d22", "journal": "journal_lg_2_d22", "group": "Reference", "depth": -2, "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d11"}, {"source": "doi_lg_2_i", "target": "doi_lg_2_d12"}, {"source": "doi_lg_2_h11", "target": "doi_lg_2_i"}, {"source": "doi_lg_2_h12", "target": "doi_lg_2_i"}, {"source": "doi_cg_i", "target": "doi_lg_2_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h21", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h22", "target": "doi_lg_2_h11"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_h23"}, {"source": "doi_lg_2_h23", "target": "doi_lg_2_h24"}, {"source": "doi_lg_1_h23", "target": "doi_cg_i"}, {"source": "doi_cg_h11", "target": "doi_cg_i"}, {"source": "doi_lg_2_h11", "target": "doi_cg_i"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d21"}, {"source": "doi_lg_2_d12", "target": "doi_lg_2_d22"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}, {"source": "doi_lg_2_h24", "target": "doi_lg_2_d12"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]} \ No newline at end of file diff --git a/verarbeitung/update_graph/update_graph_del.py b/verarbeitung/update_graph/update_graph_del.py index c371e0cf2f275b7f18110013d48390d00420b7b1..eaa25d69002a7bb746ebf7b02d6342f9252b87af 100644 --- a/verarbeitung/update_graph/update_graph_del.py +++ b/verarbeitung/update_graph/update_graph_del.py @@ -29,10 +29,15 @@ def delete_ref_nodes_rec(pub): ''' for reference in pub.references: for ref_pub in processed_list: - if (ref_pub.doi_url == reference): + if (ref_pub.doi_url == reference.doi_url): # to find a cyclus and avoid recursion error - if (reference not in pub.citations): + not_in_citations = True + for citation in pub.citations: + if (reference.doi_url == citation.doi_url): + not_in_citations = False + break + if (not_in_citations): delete_ref_nodes_rec(ref_pub) # removes publication from list after recursion and if it's not of group input @@ -49,10 +54,15 @@ def delete_cit_nodes_rec(pub): ''' for citation in pub.citations: for cit_pub in processed_list: - if (cit_pub.doi_url == citation): + if (cit_pub.doi_url == citation.doi_url): # to find a cyclus and avoid recursion error - if (citation not in pub.references): + not_in_references = True + for reference in pub.references: + if (citation.doi_url == reference.doi_url): + not_in_references = False + break + if (not_in_references): delete_cit_nodes_rec(cit_pub) # removes publication from list after recursion and if it's not of group input