diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index eccdca03b3155eca576cfec65a03a6d14e78d550..ab7db1d5c46193c14637f10e28fd163dee81b10e 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -27,11 +27,14 @@ from json_demo import output_to_json # TO-DO: Listenelemente auf Korrektheit überprüfen def initialize_nodes_list(doi_input_list, test_var): for pub_doi in doi_input_list: + + # checks if its a test and chooses input function accordingly if(test_var): pub = input_test_func(pub_doi) else: pub = input(pub_doi) + # checks if publication already exists in nodes not_in_nodes = True for node in nodes: # checks if a pub is already in nodes if (pub.doi_url == node.doi_url): @@ -43,16 +46,22 @@ def initialize_nodes_list(doi_input_list, test_var): else: doi_input_list.remove(pub_doi) +# adds inner edges between citations and references to edges def complete_inner_edges(test_var): for node in nodes: + + # checks if its a test and chooses input function accordingly if (test_var): pub = input_test_func(node.doi_url) else: pub = input(node.doi_url) + + if (node.group == "depth"): for citation in pub.citations: if (citation in nodes and [citation.doi_url, pub.doi_url] not in edges): edges.append([citation.doi_url, pub.doi_url]) + if (node.group == "height"): for reference in pub.references: for node in nodes: @@ -65,6 +74,8 @@ def complete_inner_edges(test_var): # adds edges for citations between publications def create_graph_structure_citations(pub, search_height, search_height_max): for citation in pub.citations: + + # checks if publication already exists in nodes not_in_nodes = True for node in nodes: # checks every citation for duplication @@ -77,7 +88,7 @@ def create_graph_structure_citations(pub, search_height, search_height_max): nodes.append(citation) edges.append([citation.doi_url,pub.doi_url]) - # adds only edge if citation already exists + # adds only an edge (citation already exists) elif [citation.doi_url,pub.doi_url] not in edges: edges.append([citation.doi_url,pub.doi_url]) @@ -87,6 +98,8 @@ def create_graph_structure_citations(pub, search_height, search_height_max): # adds edges for references between publications def create_graph_structure_references(pub, search_depth, search_depth_max): for reference in pub.references: + + # checks if publication already exists in nodes not_in_nodes = True for node in nodes: # checks every reference for duplication @@ -99,7 +112,7 @@ def create_graph_structure_references(pub, search_depth, search_depth_max): nodes.append(reference) edges.append([pub.doi_url,reference.doi_url]) - # adds only edge if citation already exists + # adds only an edge (citation already exists) elif [pub.doi_url,reference.doi_url] not in edges: edges.append([pub.doi_url,reference.doi_url]) @@ -115,6 +128,8 @@ def process_citations_rec(doi_citations, search_height, search_height_max, test_ # create class object for every citation from list for pub_doi in doi_citations: + + # checks if its a test and chooses input function accordingly if (test_var): pub = input_test_func(pub_doi) else: @@ -129,7 +144,7 @@ def process_citations_rec(doi_citations, search_height, search_height_max, test_ # currently only the references with acs are stored in the URL, because we can't # extract the info from other sources. - if ("acs" in citation.doi_url): + if ("acs" in citation.doi_url or test_var == True): citations_list.append(citation.doi_url) # recursive call of function. @@ -147,6 +162,8 @@ def process_references_rec(doi_references, search_depth, search_depth_max, test_ # create class object for every citation from list for pub_doi in doi_references: + + #checks if its a test and chooses input function accordingly if (test_var): pub = input_test_func(pub_doi) else: @@ -164,7 +181,6 @@ def process_references_rec(doi_references, search_depth, search_depth_max, test_ if ("acs" in reference.doi_url or test_var == True): references_list.append(reference.doi_url) - # recursive call of function. process_references_rec(references_list, search_depth, search_depth_max, test_var) @@ -190,6 +206,7 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): nodes = [] edges = [] + initialize_nodes_list(doi_input_list,test_var) process_citations_rec(doi_input_list, 0, search_height, test_var) process_references_rec(doi_input_list, 0, search_depth, test_var) diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/Processing_unittest.py index 6d83de109124f16922d1c5276c9d91b41e577d27..772d57204ce3374211d1d1fd3d08d279f085aac3 100644 --- a/verarbeitung/Processing_unittest.py +++ b/verarbeitung/Processing_unittest.py @@ -2,7 +2,7 @@ import unittest from Processing import process_main class ProcessingTest(unittest.TestCase): - def testCycle(self): + def testCycle(self): nodes, edges = process_main(['doiz1'],1,1,True) self.assertCountEqual(nodes, ['doiz1', 'doiz2']) self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) @@ -17,7 +17,7 @@ class ProcessingTest(unittest.TestCase): #def testEmptyDepth(self): - def testEmptyDepthHeight(self): + def testEmptyDepthHeight(self): nodes, edges = process_main(['doi1'],0,0,True) self.assertCountEqual(nodes,['doi1']) self.assertCountEqual(edges, []) @@ -31,10 +31,36 @@ class ProcessingTest(unittest.TestCase): self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) - def testInnerEdges(self): + def testInnerEdges(self): nodes, edges = process_main(['doi_ie1'],1,1,True) self.assertCountEqual(nodes,['doi_ie1','doi_ie2','doi_ie3']) self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) + + def testRightHeight(self): + nodes, edges = process_main(['doi_h01'],1,0,True) + self.assertCountEqual(nodes,['doi_h01']) + self.assertCountEqual(edges, []) + + nodes, edges = process_main(['doi_h02'],1,0,True) + self.assertCountEqual(nodes,['doi_h02','doi_h1']) + self.assertCountEqual(edges, [['doi_h1','doi_h02']]) + + nodes, edges = process_main(['doi_h02'],2,0,True) + self.assertCountEqual(nodes,['doi_h02','doi_h1','doi_h2']) + self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) + + def testRightDepth(self): + nodes, edges = process_main(['doi_d01'],0,1,True) + self.assertCountEqual(nodes,['doi_d01']) + self.assertCountEqual(edges, []) + + nodes, edges = process_main(['doi_d02'],0,1,True) + self.assertCountEqual(nodes,['doi_d02','doi_d1']) + self.assertCountEqual(edges, [['doi_d02','doi_d1']]) + + nodes, edges = process_main(['doi_d02'],0,2,True) + self.assertCountEqual(nodes,['doi_d02','doi_d1','doi_d2']) + self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e08ca682bcdfcff17580d1e2c0923b6aac9ce00d Binary files /dev/null and b/verarbeitung/__pycache__/Processing.cpython-39.pyc differ diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc index d5948ff804073098a6d097ad17e81503eb7ec86b..68e42fd6a47a02787524c68816a42574834931d2 100644 Binary files a/verarbeitung/__pycache__/input_test.cpython-39.pyc and b/verarbeitung/__pycache__/input_test.cpython-39.pyc differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc index a4f37b5097c0c5a82e7d36cd45671fb178e749ca..4e31ce337645d5282ddab11668bc6d745735f9f8 100644 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and b/verarbeitung/__pycache__/json_demo.cpython-39.pyc differ diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py index 49621e6c86bbd0919b7bd393e7bd0be09079660f..44361c4b095f1c4fb0fce1868498d0e9da32f551 100644 --- a/verarbeitung/input_test.py +++ b/verarbeitung/input_test.py @@ -67,4 +67,16 @@ inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2' inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', [], ['doi_ie1','doi_ie3'], ''] inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['doi_ie1','doi_ie2'], [], ''] -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3] +right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', [], [], ''] +right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', [], ['doi_h1'], ''] +right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', [], ['doi_h2'], ''] +right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', [], ['doi_h3'], ''] +right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', [], [], ''] + +right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', [], [], ''] +right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['doi_d1'], [], ''] +right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['doi_d2'], [], ''] +right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['doi_d3'], [], ''] +right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', [], [], ''] + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3] diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index 32c49d8c3dcdc4ffada291b8a7b3a4aaba200ea0..23d7d2ddd9237bffb2319557c1df1c280ccc88be 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"name": "title_ie1", "author": ["contributor_ie1.1", "contributor_ie1.2"], "year": "date_ie1", "journal": "journal_ie1", "doi": "doi_ie1", "group": "input"}, {"name": "titlez_ie3", "author": ["contributor_ie3.1", "contributor_ie3.2"], "year": "date_ie3", "journal": "journal_ie3", "doi": "doi_ie3", "group": "height"}, {"name": "title_ie2", "author": ["contributor_ie2.1", "contributor_ie2.2"], "year": "date_ie2", "journal": "journal_ie2", "doi": "doi_ie2", "group": "depth"}], "links": [{"source": "doi_ie3", "target": "doi_ie1"}, {"source": "doi_ie1", "target": "doi_ie2"}, {"source": "doi_ie3", "target": "doi_ie2"}]} \ No newline at end of file +{"nodes": [{"name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "doi": "doi_h02", "group": "input"}, {"name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "doi": "doi_h1", "group": "height"}, {"name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "doi": "doi_h2", "group": "height"}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]} \ No newline at end of file