diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index a8631449d1445a54f24ecf9e573738ded1be3c2f..e7737080f554edf25eaaf631f8c68debb1e27b55 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -24,10 +24,10 @@ from input.interface import InputInterface as Input from input_test import input_test_func from json_demo import output_to_json -# adds every publication from input list to graph structure -# doi_input_list: list of publication dois from user + def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): ''' +<<<<<<< HEAD :param doi_input_list: input list of doi from UI :type doi_input_list: list of strings @@ -41,19 +41,31 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t :type test_var: boolean ''' +======= + :param doi_input_list: list with dois from user + :type doi_input_list: list + :param search_depth_max: recursion depth limit + :type search_depth_max: Integer + :param search_height_max: recursion height limit + :type search_height_max: Integer + :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll + :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) + + # adds every publication from input list to graph structure + # doi_input_list: list of publication dois from user + ''' +>>>>>>> 860565e76ef4ea961db9ca1564c95ac12ff9bf46 references_pub_obj_list = [] citations_pub_obj_list = [] - for pub_doi in doi_input_list: - - #checks if its a test and chooses input function accordingly - if(test_var): - pub = input_test_func(pub_doi) - else: + for pub_doi in doi_input_list: #iterates over every incoming doi + if(test_var): #checks that it is a test and chooses test-input function + pub = input_test_func(pub_doi) #creates an object of class Publication + else: #checks that it isnt a test and chooses standart-input function #print(pub_doi) inter = Input() try: - pub = inter.get_publication(pub_doi) + pub = inter.get_publication(pub_doi) #creates an object of class Publication except ValueError: continue except IndexError: @@ -61,16 +73,16 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t # checks if publication already exists in nodes - not_in_nodes = True - for node in nodes: # checks if a pub is already in nodes - if (pub.doi_url == node.doi_url): - not_in_nodes = False + not_in_nodes = True #boolean value to check if a node already exists in the set of nodes + for node in nodes: #iterates over every node in the set of nodes + if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set + not_in_nodes = False #false --> node will not be created break - if (not_in_nodes): - nodes.append(pub) + if (not_in_nodes): #there is no node with this doi in the set + nodes.append(pub) #appends Publication Object pub.group = "input" else: - doi_input_list.remove(pub_doi) + doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list # inserts references as publication objects into list and # inserts first depth references into nodes/edges if maximum search depth > 0 @@ -85,9 +97,10 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t return(references_pub_obj_list, citations_pub_obj_list) -# adds edges between citation and reference group + def complete_inner_edges(test_var): ''' +<<<<<<< HEAD :param test_var: variable to differenciate between test and url call :type test_var: boolean ''' @@ -123,21 +136,54 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ :type test_var: boolean ''' +======= + :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll + :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) + + # adds edges between citation and reference group + ''' + for node in nodes: #iterates over all nodes in the set of nodes + if (node.group == "depth"): #checks if the node has group depth (=is a reference from a paper) + for citation in node.citations: #iterates over the papers that this paper is cited by + for cit in nodes: #iterates over all nodes in set of nodes + if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges): #checks if there is already a related node that is in the set of nodes + edges.append([citation.doi_url, node.doi_url]) # creates an edge between them + if (node.group == "height"): #checks if the node has group height (=is a citation from a paper) + for reference in node.references: #iterates over the papers that this is paper references + for ref in nodes: #iterates over all nodes in set of nodes + if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges): #checks if there is already a related node that is in the set of nodes + edges.append([node.doi_url,reference.doi_url]) #creates an edge between them + + + + +def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): + ''' + :param pub: Paper + :type pub: Onbject of class Publication + :param search_depth: current recursion step + :type search_depth: integer + :param search_depth_max: recursion limit + :type search_depth_max: integer + :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll + :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) + # adds a node for every publication unknown + # adds edges for references between publications + # returs a list of nodes + ''' +>>>>>>> 860565e76ef4ea961db9ca1564c95ac12ff9bf46 references_pub_obj_list = [] - for reference in pub.references: - not_in_nodes = True - for node in nodes: - # checks every reference for duplication - if (reference.doi_url == node.doi_url): - not_in_nodes = False + for reference in pub.references: #iterates over the references of the considered paper + not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes + for node in nodes: #iterates over all nodes in set of nodes + if (reference.doi_url == node.doi_url): #determines that the node already exists + not_in_nodes = False #boolean false --> node will not be created break - if (not_in_nodes): - if (search_depth < search_depth_max): - - #checks if its a test and chooses input function accordingly - if (test_var): - reference_pub_obj = input_test_func(reference.doi_url) - else: + if (not_in_nodes): #checks that there is no node with this doi + if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit + if (test_var): #determines that it is a test and chooses the test-input function + reference_pub_obj = input_test_func(reference.doi_url) #creates an Object of Publication Class + else: #determines that it isnt a test and chooses the standart function #reference_pub_obj = Input(reference.doi_url) inter = Input() try: @@ -149,9 +195,9 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ continue reference_pub_obj.group = "depth" - nodes.append(reference_pub_obj) - edges.append([pub.doi_url,reference_pub_obj.doi_url]) - references_pub_obj_list.append(reference_pub_obj) + nodes.append(reference_pub_obj) # appends the object to the set of nodes + edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges + references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references # adds edge only if citation already exists elif [pub.doi_url,reference.doi_url] not in edges: @@ -159,6 +205,7 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ return references_pub_obj_list +<<<<<<< HEAD # recursive function to implement height-first-search on references # references_pub_obj_list: input list of references as publication objects # search_depth: current search_depth of height-first-search @@ -178,6 +225,22 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m :type test_var: boolean ''' +======= + +def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): + ''' + :param references_pub_obj_list: input list of references as publication objects + :type references_pub_obj_list: liste + :param search_depth: current search_depth of height-first-search + :type search_depth: integer + :param search_depth_max: maximal search_depth for dfs + :type search_depth_max: integer + :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll + :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) + + # recursive function to implement height-first-search on references + ''' +>>>>>>> 860565e76ef4ea961db9ca1564c95ac12ff9bf46 # adds next level to nodes/edges for pub in references_pub_obj_list: new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) @@ -189,10 +252,10 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m -# adds a node for every publication unknown -# adds edges for citations between publications + def create_graph_structure_citations(pub, search_height, search_height_max, test_var): ''' +<<<<<<< HEAD :param pub: publication which citations will be added :type pub: Class Publication @@ -206,18 +269,29 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test :type test_var: boolean ''' +======= + :param pub: Paper + :type pub: Onbject of class Publication + :param search_height: current recursion step + :type search_height: integer + :param search_height_max: recursion limit + :type search_height_max: integer + :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll + :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) + # adds a node for every publication unknown + # adds edges for citations between publications + # returns list of nodes + ''' +>>>>>>> 860565e76ef4ea961db9ca1564c95ac12ff9bf46 citations_pub_obj_list = [] for citation in pub.citations: not_in_nodes = True - for node in nodes: - # checks every citation for duplication + for node in nodes: # checks every citation for duplication if (citation.doi_url == node.doi_url): not_in_nodes = False break if (not_in_nodes): - if (search_height < search_height_max): - - #checks if its a test and chooses input function accordingly + if (search_height < search_height_max): #checks if its a test and chooses input function accordingly if (test_var): citation_pub_obj = input_test_func(citation.doi_url) else: @@ -242,6 +316,7 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test +<<<<<<< HEAD # recursive function to implement height-first-search on citations # citations_pub_obj_list: input list of citations as publication objects # search_height: current search_height of height-first-search @@ -261,6 +336,22 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m :type test_var: boolean ''' +======= + +def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): + ''' + :param references_pub_obj_list: input list of citations as publication objects + :type references_pub_obj_list: liste + :param search_height: current search_height of height-first-search + :type search_height: integer + :param search_height_max: maximal search_height for dfs + :type search_height_max: integer + :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll + :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) + + # recursive function to implement height-first-search on citations + ''' +>>>>>>> 860565e76ef4ea961db9ca1564c95ac12ff9bf46 # adds next level to nodes/edges for pub in citations_pub_obj_list: new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var) @@ -272,13 +363,10 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m -# main function to call. Needs as input: -# doi_input_list: input list of dois -# search_height: max search height to process to -# search_depth: max search depth to process to -# test_var: only needed for unit test as True, default is False + def process_main(doi_input_list, search_height, search_depth, test_var = False): ''' +<<<<<<< HEAD :param doi_input_list: input list of doi from UI :type doi_input_list: list of strings @@ -292,6 +380,19 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): :type test_var: boolean ''' +======= + :param doi_input_list: list with dois from user + :type doi_input_list: list + :param search_height: recursion height + :type search_height: integer + :param search_depth: recursion depth + :type search_depth: integer + :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll + :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) + + # main function to call. Needs as input: + ''' +>>>>>>> 860565e76ef4ea961db9ca1564c95ac12ff9bf46 # ERROR-Handling doi_array = NULL if (len(doi_input_list) == 0): print("Error, no input data") @@ -304,11 +405,11 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): if (search_depth < 0): print("Error, search_depth of search must be positive") - # create empty array for the nodes - # create empty array for the edges + + global nodes, edges - nodes = [] - edges = [] + nodes = [] # create empty array for the nodes + edges = [] # create empty array for the edges # initializes nodes/edges from input and gets a list with publication objects for citations and references returned references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) @@ -331,4 +432,4 @@ def process_main(doi_input_list, search_height, search_depth, test_var = False): doi_nodes_list.append(node.doi_url) return(doi_nodes_list, edges) - return(nodes,edges) \ No newline at end of file + return(nodes,edges)