diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index 753bbe94baf025b9c7a6138016486de0fe5afdc8..eccdca03b3155eca576cfec65a03a6d14e78d550 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -39,11 +39,26 @@ def initialize_nodes_list(doi_input_list, test_var): break if (not_in_nodes): nodes.append(pub) - #print(pub.doi_url) pub.group = "input" else: doi_input_list.remove(pub_doi) +def complete_inner_edges(test_var): + for node in nodes: + if (test_var): + pub = input_test_func(node.doi_url) + else: + pub = input(node.doi_url) + if (node.group == "depth"): + for citation in pub.citations: + if (citation in nodes and [citation.doi_url, pub.doi_url] not in edges): + edges.append([citation.doi_url, pub.doi_url]) + if (node.group == "height"): + for reference in pub.references: + for node in nodes: + if (reference.doi_url in node.doi_url and [pub.doi_url, reference.doi_url] not in edges): + edges.append([pub.doi_url,reference.doi_url]) + # adds a node for every publication unknown @@ -178,6 +193,7 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): initialize_nodes_list(doi_input_list,test_var) process_citations_rec(doi_input_list, 0, search_height, test_var) process_references_rec(doi_input_list, 0, search_depth, test_var) + complete_inner_edges(test_var) output_to_json(nodes,edges) @@ -232,7 +248,7 @@ def test_print(): #test_print() #test_cycle() -print(process_main(['doiz1'],1,1,True)) -print(process_main(['doi1'],0,0,True)) +#print(process_main(['doiz1'],1,1,True)) +#print(process_main(['doi1'],0,0,True)) \ No newline at end of file diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/Processing_unittest.py index c64c6eb12925771f0dba9736748efc9f9fe5d0da..6d83de109124f16922d1c5276c9d91b41e577d27 100644 --- a/verarbeitung/Processing_unittest.py +++ b/verarbeitung/Processing_unittest.py @@ -3,13 +3,13 @@ from Processing import process_main class ProcessingTest(unittest.TestCase): def testCycle(self): - nodes, edges = process_main(['doiz1'],1,1,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) + nodes, edges = process_main(['doiz1'],1,1,True) + self.assertCountEqual(nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) - nodes, edges = process_main(['doiz1'],2,2,True) - self.assertCountEqual(nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) + nodes, edges = process_main(['doiz1'],2,2,True) + self.assertCountEqual(nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) #def testBigCycle(self): @@ -18,20 +18,23 @@ class ProcessingTest(unittest.TestCase): #def testEmptyDepth(self): def testEmptyDepthHeight(self): - nodes, edges = process_main(['doi1'],0,0,True) - self.assertCountEqual(nodes,['doi1']) - self.assertCountEqual(edges, []) + nodes, edges = process_main(['doi1'],0,0,True) + self.assertCountEqual(nodes,['doi1']) + self.assertCountEqual(edges, []) - nodes, edges = process_main(['doi1', 'doi2'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2']) - self.assertCountEqual(edges, [['doi1', 'doi2']]) + nodes, edges = process_main(['doi1', 'doi2'],0,0,True) + self.assertCountEqual(nodes, ['doi1','doi2']) + self.assertCountEqual(edges, [['doi1', 'doi2']]) - nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) - self.assertCountEqual(nodes, ['doi1','doi2', 'doi3']) - self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) + nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) + self.assertCountEqual(nodes, ['doi1','doi2', 'doi3']) + self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) - #def testInnerEdges(self): + def testInnerEdges(self): + nodes, edges = process_main(['doi_ie1'],1,1,True) + self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3']) + self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/verarbeitung/__pycache__/Processing.cpython-38.pyc b/verarbeitung/__pycache__/Processing.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63ac529316c848e829cd83ef44ec749e5903bf9e Binary files /dev/null and b/verarbeitung/__pycache__/Processing.cpython-38.pyc differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc index ae6b213cdfdd6a2c2fef9b9f68e2d1d431413958..515ab99c01a5ce78bb5bb6de554a4dae3ffe4b4b 100644 Binary files a/verarbeitung/__pycache__/input_fj.cpython-38.pyc and b/verarbeitung/__pycache__/input_fj.cpython-38.pyc differ diff --git a/verarbeitung/__pycache__/input_test.cpython-38.pyc b/verarbeitung/__pycache__/input_test.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df395212453392e135532b12396cd4c30a92ea05 Binary files /dev/null and b/verarbeitung/__pycache__/input_test.cpython-38.pyc differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc index e61f915490a7d068eb80a1c95d95752082231775..4a1e7ba987775a20fddaa4a8f846bb238670d6a1 100644 Binary files a/verarbeitung/__pycache__/json_demo.cpython-38.pyc and b/verarbeitung/__pycache__/json_demo.cpython-38.pyc differ diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py index c55f85219a7df0c76d37e6a34211a7c14ea3f1d7..49621e6c86bbd0919b7bd393e7bd0be09079660f 100644 --- a/verarbeitung/input_test.py +++ b/verarbeitung/input_test.py @@ -63,4 +63,8 @@ beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], ''] zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], ''] -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2] +inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['doi_ie2'], ['doi_ie3'], ''] +inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], ''] +inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], ''] + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3] diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index 5b81672e2236baf25949729c3c32b56f7d08edc8..32c49d8c3dcdc4ffada291b8a7b3a4aaba200ea0 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"name": "title1", "author": ["contributor1"], "year": "date1", "journal": "journal1", "doi": "doi1", "group": "input"}, {"name": "title2", "author": ["contributor2"], "year": "date2", "journal": "journal2", "doi": "doi2", "group": "input"}, {"name": "title3", "author": ["contributor3"], "year": "date3", "journal": "journal3", "doi": "doi3", "group": "input"}], "links": [{"source": "doi3", "target": "doi1"}, {"source": "doi1", "target": "doi2"}]} \ No newline at end of file +{"nodes": [{"name": "title_ie1", "author": ["contributor_ie1.1", "contributor_ie1.2"], "year": "date_ie1", "journal": "journal_ie1", "doi": "doi_ie1", "group": "input"}, {"name": "titlez_ie3", "author": ["contributor_ie3.1", "contributor_ie3.2"], "year": "date_ie3", "journal": "journal_ie3", "doi": "doi_ie3", "group": "height"}, {"name": "title_ie2", "author": ["contributor_ie2.1", "contributor_ie2.2"], "year": "date_ie2", "journal": "journal_ie2", "doi": "doi_ie2", "group": "depth"}], "links": [{"source": "doi_ie3", "target": "doi_ie1"}, {"source": "doi_ie1", "target": "doi_ie2"}, {"source": "doi_ie3", "target": "doi_ie2"}]} \ No newline at end of file