diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index 89f1ef2707d456c565adca8988d1ebe27b6501e3..ac85f522ac884d5e48fedef2f42f256bba610c79 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -25,9 +25,13 @@ from json_demo import output_to_json # doi_input_list: list of publication dois from user # TO-DO: Listenelemente auf Korrektheit überprüfen -def initialize_nodes_list(doi_input_list): +def initialize_nodes_list(doi_input_list, test_var): for pub_doi in doi_input_list: - pub = input_test_func(pub_doi) + if(test_var): + pub = input_test_func(pub_doi) + else: + pub = input(pub_doi) + not_in_nodes = True for node in nodes: # checks if a pub is already in nodes if (pub.doi_url == node.doi_url): @@ -35,6 +39,7 @@ def initialize_nodes_list(doi_input_list): break if (not_in_nodes): nodes.append(pub) + #print(pub.doi_url) pub.group = "input" else: doi_input_list.remove(pub_doi) @@ -55,11 +60,11 @@ def create_graph_structure_citations(pub, search_height, search_height_max): if (search_height <= search_height_max): citation.group = "height" nodes.append(citation) - edges.append([pub.doi_url,citation.doi_url]) + edges.append([citation.doi_url,pub.doi_url]) # adds only edge if citation already exists else: - edges.append([pub.doi_url,citation.doi_url]) + edges.append([citation.doi_url,pub.doi_url]) @@ -77,11 +82,11 @@ def create_graph_structure_references(pub, search_depth, search_depth_max): if (search_depth <= search_depth_max): reference.group = "depth" nodes.append(reference) - edges.append([reference.doi_url,pub.doi_url]) + edges.append([pub.doi_url,reference.doi_url]) # adds only edge if citation already exists else: - edges.append([reference.doi_url,pub.doi_url]) + edges.append([pub.doi_url,reference.doi_url]) @@ -89,13 +94,17 @@ def create_graph_structure_references(pub, search_depth, search_depth_max): # doi_citations: input list of citet dois # search_height: current search_height of height-first-search # search_height_max: maximal search_height for dfs -def process_citations_rec(doi_citations, search_height, search_height_max): +def process_citations_rec(doi_citations, search_height, search_height_max, test_var): # height of search is increased by 1 with each recursive call search_height += 1 # create class object for every citation from list for pub_doi in doi_citations: - pub = input_test_func(pub_doi) + if (test_var): + pub = input_test_func(pub_doi) + else: + pub = input(pub_doi) + create_graph_structure_citations(pub, search_height, search_height_max) # If the maximum height has not yet been reached, all references from the publication # are written to an array and the function is called again with this array. @@ -109,7 +118,7 @@ def process_citations_rec(doi_citations, search_height, search_height_max): citations_list.append(citation.doi_url) # recursive call of function. - process_citations_rec(citations_list, search_height, search_height_max) + process_citations_rec(citations_list, search_height, search_height_max, test_var) @@ -117,13 +126,17 @@ def process_citations_rec(doi_citations, search_height, search_height_max): # doi_references: input list of referenced dois # search_depth: current search_depth of height-first-search # search_depth_max: maximal search_depth for dfs -def process_references_rec(doi_references, search_depth, search_depth_max): +def process_references_rec(doi_references, search_depth, search_depth_max, test_var): # The depth is increased by 1 with each recursive call search_depth += 1 # create class object for every citation from list for pub_doi in doi_references: - pub = input_test_func(pub_doi) + if (test_var): + pub = input_test_func(pub_doi) + else: + pub = input(pub_doi) + create_graph_structure_references(pub, search_depth, search_depth_max) # If the maximum depth has not yet been reached, all references from the publication # are written to an array and the function is called again with this array. @@ -133,16 +146,17 @@ def process_references_rec(doi_references, search_depth, search_depth_max): # currently only the references with acs are stored in the URL, because we can't # extract the info from other sources. - if ("acs" in reference.doi_url): + if ("acs" in reference.doi_url or test_var == True): references_list.append(reference.doi_url) + # recursive call of function. - process_references_rec(references_list, search_depth, search_depth_max) + process_references_rec(references_list, search_depth, search_depth_max, test_var) -def process_main(doi_input_list, search_height, search_depth): +def process_main(doi_input_list, search_height, search_depth, test_var = False): # ERROR-Handling doi_array = NULL if (len(doi_input_list) == 0): print("Error, no input data") @@ -161,16 +175,33 @@ def process_main(doi_input_list, search_height, search_depth): nodes = [] edges = [] - initialize_nodes_list(doi_input_list) - process_citations_rec(doi_input_list, 0, search_height) - process_references_rec(doi_input_list, 0, search_depth) + initialize_nodes_list(doi_input_list,test_var) + process_citations_rec(doi_input_list, 0, search_height, test_var) + process_references_rec(doi_input_list, 0, search_depth, test_var) output_to_json(nodes,edges) # only for internal testing return(nodes,edges) +def print_graph(nodes, edges): + print("Knoten:\n") + for node in nodes: + print(node.title, "\n") + print("\n Kanten:\n") + for edge in edges: + print(edge,"\n") + + +# function to test cycles +def test_cycle(): + arr = [] + arr.append('doiz1') + #arr.append('doiz2') + nodes,edges = process_main(arr,1,1,True) + + print_graph(nodes, edges) # program test, because there is no connection to the input yet. def test_print(): @@ -185,19 +216,11 @@ def test_print(): #url = sys.argv[1] #arr.append[url] - arr.append('doi1') - #arr.append('doi2') - #arr.append('doi3') - - nodes,edges = process_main(arr,1,1) + nodes,edges = process_main(arr,2,2,True) - print("Knoten:\n") - for node in nodes: - print(node.title, "\n") - print("\n Kanten:\n") - for edge in edges: - print(edge,"\n") + print_graph(nodes, edges) -test_print() +#test_print() +test_cycle() \ No newline at end of file diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5948ff804073098a6d097ad17e81503eb7ec86b Binary files /dev/null and b/verarbeitung/__pycache__/input_test.cpython-39.pyc differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc index 29c2c14b5229d9768c2a273f8e1adba6cfcfc63f..a4f37b5097c0c5a82e7d36cd45671fb178e749ca 100644 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and b/verarbeitung/__pycache__/json_demo.cpython-39.pyc differ diff --git a/verarbeitung/input_test.py b/verarbeitung/input_test.py index 79b00b1ce11e63f73e11251352745a673b8face9..c55f85219a7df0c76d37e6a34211a7c14ea3f1d7 100644 --- a/verarbeitung/input_test.py +++ b/verarbeitung/input_test.py @@ -60,4 +60,7 @@ beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['doi2'], beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', [], ['doi1'], ''] beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['doi1'], [], ''] -list_of_arrays = [beispiel1, beispiel2, beispiel3] +zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['doiz2'], ['doiz2'], ''] +zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['doiz1'], ['doiz1'], ''] + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2] diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index 683e49f3e3175e12dcbe10df1e48c51dd27a8b77..8f648bf42654aefc13dd47dcecee507306184420 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"name": "title1", "author": ["contributor1"], "year": "date1", "journal": "journal1", "doi": "doi1", "group": "input"}, {"name": "title3", "author": ["contributor3"], "year": "date3", "journal": "journal3", "doi": "doi3", "group": "height"}, {"name": "title2", "author": ["contributor2"], "year": "date2", "journal": "journal2", "doi": "doi2", "group": "depth"}], "links": [{"source": "doi1", "target": "doi3"}, {"source": "doi2", "target": "doi1"}]} \ No newline at end of file +{"nodes": [{"name": "titlez1", "author": ["contributorz1.1", "contributorz1.2"], "year": "datez1", "journal": "journalz1", "doi": "doiz1", "group": "input"}, {"name": "titlez2", "author": ["contributorz2.1", "contributorz2.2"], "year": "datez2", "journal": "journalz2", "doi": "doiz2", "group": "depth"}], "links": [{"source": "doiz1", "target": "doiz2"}]} \ No newline at end of file diff --git "a/verarbeitung/n\303\266tige Tests.txt" "b/verarbeitung/n\303\266tige Tests.txt" new file mode 100644 index 0000000000000000000000000000000000000000..95563280436fbf6b9b8702dffef6f32e213f5a16 --- /dev/null +++ "b/verarbeitung/n\303\266tige Tests.txt" @@ -0,0 +1,4 @@ +Zyklus +großer Zyklus +Innere Kanten vervollständigen +