diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py deleted file mode 100644 index 4beb8e85ff1e6f1df02bd557aa574eb0f6330057..0000000000000000000000000000000000000000 --- a/verarbeitung/Processing.py +++ /dev/null @@ -1,290 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Functions to generate a graph representing citations between multiple ACS/Nature journals - -""" - -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" -__email__ = "cis-project2021@zbh.uni-hamburg.de" -__status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" - - -import sys -from pathlib import Path -from os import error -sys.path.append("../") - -from input.publication import Publication -from get_pub_from_input import get_pub -from json_demo import output_to_json - - -def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): - ''' - :param doi_input_list: input list of doi from UI - :type doi_input_list: List[String] - - :param search_depth_max: maximum depth to search for references - :type search_depth_max: int - - :param search_height_max: maximum height to search for citations - :type search_height_max: int - - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - - adds input dois to nodes and retrieves citations and references for input publications - ''' - - # saves found citations and references in lists - references_pub_obj_list = [] - citations_pub_obj_list = [] - - for pub_doi in doi_input_list: #iterates over every incoming doi - pub = get_pub(pub_doi, test_var) - if (type(pub) != Publication): - print(pub) - continue - - # checks if publication already exists in nodes - not_in_nodes = True #boolean value to check if a node already exists in the set of nodes - for node in nodes: #iterates over every node in the set of nodes - if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set - not_in_nodes = False #false --> node will not be created - break - if (not_in_nodes): #there is no node with this doi in the set - nodes.append(pub) #appends Publication Object - pub.group = "input" - else: - doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list - - # inserts references as publication objects into list and - # inserts first depth references into nodes/edges if maximum search depth > 0 - for reference in create_graph_structure_references(pub, 0, search_depth_max, test_var): - references_pub_obj_list.append(reference) - - # inserts citations as publication objects into list and - # inserts first height citations into nodes if maximum search height > 0 - for citation in create_graph_structure_citations(pub, 0, search_height_max, test_var): - citations_pub_obj_list.append(citation) - - return(references_pub_obj_list, citations_pub_obj_list) - - - -def complete_inner_edges(test_var): - ''' - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - - completes inner edges between nodes of group height and depth - ''' - - for node in nodes: - if (node.group == "depth"): - for citation in node.citations: - for cit in nodes: - if (citation == cit.doi_url and [citation, node.doi_url] not in edges): - edges.append([citation, node.doi_url]) - if (node.group == "height"): - for reference in node.references: - for ref in nodes: - if (reference == ref.doi_url and [node.doi_url, reference] not in edges): - edges.append([node.doi_url,reference]) - - - -def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): - ''' - :param pub: publication which references will be added - :type pub: Publication - - :param search_depth: current depth to search for references - :type search_depth: int - - :param search_depth_max: maximum depth to search for references - :type search_depth_max: int - - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - - adds a node for every referenced publication unknown - adds edges to added references - ''' - - references_pub_obj_list = [] - for reference in pub.references: #iterates over the references of the considered paper - not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes - for node in nodes: #iterates over all nodes in set of nodes # - if (reference == node.doi_url): #determines that the node already exists - not_in_nodes = False #boolean false --> node will not be created - break - if (not_in_nodes): #checks that there is no node with this doi - if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit - reference_pub_obj = get_pub(reference, test_var) - if (type(reference_pub_obj) != Publication): - print(pub) - continue - - reference_pub_obj.group = "depth" - nodes.append(reference_pub_obj) # appends the object to the set of nodes - edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges - references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references - - # adds edge only if citation already exists - elif [pub.doi_url,reference] not in edges: - edges.append([pub.doi_url,reference]) - return references_pub_obj_list - - -def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): - ''' - :param references_pub_obj_list: list of publications which references will be added - :type references_pub_obj_list: List[Publication] - - :param search_depth: current depth to search for references - :type search_depth: int - - :param search_depth_max: maximum depth to search for references - :type search_depth_max: int - - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - - recursive function to implement height-first-search on references - ''' - - # adds next level to nodes/edges - for pub in references_pub_obj_list: - new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) - - # If the maximum height has not yet been reached, calls function recursivly with increased height - if (search_depth < search_depth_max): - process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var) - - - -def create_graph_structure_citations(pub, search_height, search_height_max, test_var): - ''' - :param pub: publication which citations will be added - :type pub: Publication - - :param search_height: current height to search for citations - :type search_height_max: int - - :param search_height_max: maximum height to search for citations - :type search_height_max: int - - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - - adds a node for every citing publication unknown - adds edges to added citations - ''' - - citations_pub_obj_list = [] - for citation in pub.citations: - not_in_nodes = True - for node in nodes: # checks every citation for duplication - if (citation == node.doi_url): - not_in_nodes = False - break - if (not_in_nodes): - if (search_height < search_height_max): #checks if its a test and chooses input function accordingly - citation_pub_obj = get_pub(citation, test_var) - if (type(citation_pub_obj) != Publication): - print(pub) - continue - - citation_pub_obj.group = "height" - nodes.append(citation_pub_obj) - edges.append([citation_pub_obj.doi_url,pub.doi_url]) - citations_pub_obj_list.append(citation_pub_obj) - - # adds only edge if citation already exists - elif [citation,pub.doi_url] not in edges: - edges.append([citation,pub.doi_url]) - return citations_pub_obj_list - - -def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): - ''' - :param citations_pub_obj_list: list of publications which citations will be added - :type citations_pub_obj_list: List[Publication] - - :param search_height: current height to search for citations - :type search_height_max: int - - :param search_height_max: maximum height to search for citations - :type search_height_max: int - - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - - recursive function to implement depth-first-search on citations - ''' - - # adds next level to nodes/edges - for pub in citations_pub_obj_list: - new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var) - - # If the maximum height has not yet been reached, calls function recursivly with increased height - if (search_height < search_height_max): - process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var) - - - -def process_main(doi_input_list, search_height, search_depth, test_var = False): - ''' - :param doi_input_list: input list of doi from UI - :type doi_input_list: list of strings - - :param search_height: maximum height to search for citations - :type search_height: int - - :param search_depth: maximum depth to search for references - :type search_depth: int - - :param test_var: variable to differenciate between test and url call - :type test_var: boolean - - main function to start graph generation - ''' - - # ERROR-Handling doi_array = NULL - if (len(doi_input_list) == 0): - print("Error, no input data") - - # ERROR- if a negative number is entered for height - if (search_height < 0): - print("Error, search_height of search must be positive") - - # ERROR- if a negative number is entered for depth - if (search_depth < 0): - print("Error, search_depth of search must be positive") - - - # creates empty lists to save nodes and edges - global nodes, edges - nodes = [] - edges = [] - - # initializes nodes/edges from input and gets a list with publication objects for citations and references returned - references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) - - # function calls to begin recursive processing up to max depth/height - process_citations_rec(citations_obj_list, 1, search_height, test_var) - process_references_rec(references_obj_list, 1, search_depth, test_var) - - # adds edges between reference group and citation group of known publications - complete_inner_edges(test_var) - - # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes,edges, test_var) - - return(nodes,edges) diff --git a/verarbeitung/__init__.py b/verarbeitung/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/__pycache__/Processing.cpython-36.pyc b/verarbeitung/__pycache__/Processing.cpython-36.pyc deleted file mode 100644 index 203d7b80e86c1714067062b8efd787fd591d82e5..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing.cpython-38.pyc b/verarbeitung/__pycache__/Processing.cpython-38.pyc deleted file mode 100644 index 1906483bf8be5183bfad874433aca0cd4a75a8fa..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc deleted file mode 100644 index a86e804167e4c2fdf2e9a1f4b354ef6506381740..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc b/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc deleted file mode 100644 index 9ce1023e6ea54e1b04b37ad5a1fd08115d5f52a4..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/Processing_pub_objs_only.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-36.pyc b/verarbeitung/__pycache__/input_fj.cpython-36.pyc deleted file mode 100644 index 04312c91f0a7675651e99a2a6c10a2c9da146758..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-38.pyc b/verarbeitung/__pycache__/input_fj.cpython-38.pyc deleted file mode 100644 index 515ab99c01a5ce78bb5bb6de554a4dae3ffe4b4b..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_fj.cpython-39.pyc b/verarbeitung/__pycache__/input_fj.cpython-39.pyc deleted file mode 100644 index 175f9ebbfdf5f3313196b4f10aa01dc2e8e20509..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_fj.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-36.pyc b/verarbeitung/__pycache__/input_test.cpython-36.pyc deleted file mode 100644 index 956c497bc38c9471bc9e7cb52a870cd1174cceee..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-38.pyc b/verarbeitung/__pycache__/input_test.cpython-38.pyc deleted file mode 100644 index 35b42ad4c56ad3a65838c0ccc2716b9aea899b5b..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc deleted file mode 100644 index 550ce300289531fa2018d232b0f3bbf9986d3cd0..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/input_test.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-36.pyc b/verarbeitung/__pycache__/json_demo.cpython-36.pyc deleted file mode 100644 index 5c3a9cbc487bf90532dafb33e6c3ae84f57f6758..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc deleted file mode 100644 index 4daf8e7b50fdad59d6cac52e92a9ab3bd02a395f..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-38.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc deleted file mode 100644 index 3a90cf68398464b1132fc65f1a598bac313a51a3..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and /dev/null differ diff --git a/verarbeitung/__pycache__/unittest.cpython-36.pyc b/verarbeitung/__pycache__/unittest.cpython-36.pyc deleted file mode 100644 index 245eb7f9be9221daa930d9fa83c77368ba463af7..0000000000000000000000000000000000000000 Binary files a/verarbeitung/__pycache__/unittest.cpython-36.pyc and /dev/null differ diff --git a/verarbeitung/construct_new_graph/Processing.py b/verarbeitung/construct_new_graph/Processing.py new file mode 100644 index 0000000000000000000000000000000000000000..a9a85f93c62fca0bee3e490edd694ffbab336c35 --- /dev/null +++ b/verarbeitung/construct_new_graph/Processing.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +""" +Functions to generate a graph representing citations between multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +from os import error +sys.path.append("../") + +from input.publication import Publication +from verarbeitung.get_pub_from_input import get_pub +from .export_to_json import output_to_json +from .add_citations_rec import add_citations, create_global_lists_cit +from .add_references_rec import add_references, create_global_lists_ref + + +def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): + ''' + :param doi_input_list: input list of doi from UI + :type doi_input_list: List[String] + + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + adds input dois to nodes and retrieves citations and references for input publications + ''' + + # saves found citations and references in lists + references_pub_obj_list = [] + citations_pub_obj_list = [] + + for pub_doi in doi_input_list: #iterates over every incoming doi + pub = get_pub(pub_doi, test_var) + if (type(pub) != Publication): + print(pub) + continue + + # checks if publication already exists in nodes + not_in_nodes = True #boolean value to check if a node already exists in the set of nodes + for node in nodes: #iterates over every node in the set of nodes + if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set + not_in_nodes = False #false --> node will not be created + break + if (not_in_nodes): #there is no node with this doi in the set + nodes.append(pub) #appends Publication Object + pub.group = "input" + else: + doi_input_list.remove(pub_doi) #deletes the doi-dublicate from input list + + # inserts references as publication objects into list and + # inserts first depth references into nodes/edges if maximum search depth > 0 + for reference in create_global_lists_ref(nodes, edges, pub, 0, search_depth_max, test_var): + references_pub_obj_list.append(reference) + + # inserts citations as publication objects into list and + # inserts first height citations into nodes if maximum search height > 0 + for citation in create_global_lists_cit(nodes, edges, pub, 0, search_height_max, test_var): + citations_pub_obj_list.append(citation) + + return(references_pub_obj_list, citations_pub_obj_list) + + + +def complete_inner_edges(test_var): + ''' + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + completes inner edges between nodes of group height and depth + ''' + + for node in nodes: + if (node.group == "depth"): + for citation in node.citations: + for cit in nodes: + if (citation == cit.doi_url and [citation, node.doi_url] not in edges): + edges.append([citation, node.doi_url]) + if (node.group == "height"): + for reference in node.references: + for ref in nodes: + if (reference == ref.doi_url and [node.doi_url, reference] not in edges): + edges.append([node.doi_url,reference]) + + +def process_main(doi_input_list, search_height, search_depth, test_var = False): + ''' + :param doi_input_list: input list of doi from UI + :type doi_input_list: list of strings + + :param search_height: maximum height to search for citations + :type search_height: int + + :param search_depth: maximum depth to search for references + :type search_depth: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + main function to start graph generation + ''' + + # ERROR-Handling doi_array = NULL + if (len(doi_input_list) == 0): + print("Error, no input data") + + # ERROR- if a negative number is entered for height + if (search_height < 0): + print("Error, search_height of search must be positive") + + # ERROR- if a negative number is entered for depth + if (search_depth < 0): + print("Error, search_depth of search must be positive") + + + # creates empty lists to save nodes and edges + global nodes, edges + nodes = [] + edges = [] + + # initializes nodes/edges from input and gets a list with publication objects for citations and references returned + references_obj_list, citations_obj_list = initialize_nodes_list(doi_input_list,search_depth, search_height, test_var) + + # function calls to begin recursive processing up to max depth/height + add_citations(nodes, edges, citations_obj_list, 1, search_height, test_var) + add_references(nodes, edges, references_obj_list, 1, search_depth, test_var) + + # adds edges between reference group and citation group of known publications + complete_inner_edges(test_var) + + # calls a skript to save nodes and edges of graph in .json file + output_to_json(nodes,edges, test_var) + + return(nodes,edges) diff --git a/verarbeitung/construct_new_graph/__init__.py b/verarbeitung/construct_new_graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py new file mode 100644 index 0000000000000000000000000000000000000000..6dd815f732b8fe17525f119ef8b8339794dd0856 --- /dev/null +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +""" +Functions to add citations recursivly for multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +from os import error +sys.path.append("../../") + +from input.publication import Publication +from verarbeitung.get_pub_from_input import get_pub + +def create_global_lists_cit(input_nodes, input_edges, pub, search_height, search_height_max, test_var): + ''' + :param input_nodes: list of nodes from Processing + :type input_nodes: List[Publication] + + :param input_edges: list of edges from Processing + :type input_edges: List[String, String] + + :param pub: Publication which citations will be added + :type pub: Publication + + :param search_height: current height to search for citations + :type search_height_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to create nodes and edges and call create_graph_structure_citations + ''' + + global nodes, edges + nodes = input_nodes + edges = input_edges + + return create_graph_structure_citations(pub, search_height, search_height_max, test_var) + + +def create_graph_structure_citations(pub, search_height, search_height_max, test_var): + ''' + :param pub: publication which citations will be added + :type pub: Publication + + :param search_height: current height to search for citations + :type search_height_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + adds a node for every citing publication unknown + adds edges to added citations + ''' + + citations_pub_obj_list = [] + for citation in pub.citations: + not_in_nodes = True + for node in nodes: # checks every citation for duplication + if (citation == node.doi_url): + not_in_nodes = False + break + if (not_in_nodes): + if (search_height < search_height_max): #checks if its a test and chooses input function accordingly + citation_pub_obj = get_pub(citation, test_var) + if (type(citation_pub_obj) != Publication): + print(pub) + continue + + citation_pub_obj.group = "height" + nodes.append(citation_pub_obj) + edges.append([citation_pub_obj.doi_url,pub.doi_url]) + citations_pub_obj_list.append(citation_pub_obj) + + # adds only edge if citation already exists + elif [citation,pub.doi_url] not in edges: + edges.append([citation,pub.doi_url]) + return citations_pub_obj_list + + +def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): + ''' + :param citations_pub_obj_list: list of publications which citations will be added + :type citations_pub_obj_list: List[Publication] + + :param search_height: current height to search for citations + :type search_height_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + recursive function to implement depth-first-search on citations + ''' + + # adds next level to nodes/edges + for pub in citations_pub_obj_list: + new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var) + + # If the maximum height has not yet been reached, calls function recursivly with increased height + if (search_height < search_height_max): + process_citations_rec(new_citation_pub_obj_list, search_height+1, search_height_max, test_var) + + +def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_height, search_height_max, test_var): + ''' + :param input_nodes: list of nodes from Processing + :type input_nodes: List[Publication] + + :param input_edges: list of edges from Processing + :type input_edges: List[String, String] + + :param citations_pub_obj_list: list of publications which citations will be added + :type citations_pub_obj_list: List[Publication] + + :param search_height: current height to search for citations + :type search_height_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to call recursive depth-first-search of citations + ''' + global nodes, edges + nodes = input_nodes + edges = input_edges + + process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var) + return(nodes, edges) \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/add_references_rec.py b/verarbeitung/construct_new_graph/add_references_rec.py new file mode 100644 index 0000000000000000000000000000000000000000..320bc217094ef6c277ad39a46c6f1e2b39b9b16c --- /dev/null +++ b/verarbeitung/construct_new_graph/add_references_rec.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +""" +Functions to add references recursivly for multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +from os import error +sys.path.append("../../") + +from input.publication import Publication +from verarbeitung.get_pub_from_input import get_pub + +def create_global_lists_ref(input_nodes, input_edges, pub, search_depth, search_depth_max, test_var): + ''' + :param input_nodes: list of nodes from Processing + :type input_nodes: List[Publication] + + :param input_edges: list of edges from Processing + :type input_edges: List[String, String] + + :param pub: Publication which references will be added + :type pub: Publication + + :param search_height: current height to search for references + :type search_height_max: int + + :param search_height_max: maximum height to search for references + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to create nodes and edges and call create_graph_structure_references + ''' + + global nodes, edges + nodes = input_nodes + edges = input_edges + + return create_graph_structure_references(pub, search_depth, search_depth_max, test_var) + + +def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): + ''' + :param pub: publication which references will be added + :type pub: Publication + + :param search_depth: current depth to search for references + :type search_depth: int + + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + adds a node for every referenced publication unknown + adds edges to added references + ''' + + references_pub_obj_list = [] + for reference in pub.references: #iterates over the references of the considered paper + not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes + for node in nodes: #iterates over all nodes in set of nodes # + if (reference == node.doi_url): #determines that the node already exists + not_in_nodes = False #boolean false --> node will not be created + break + if (not_in_nodes): #checks that there is no node with this doi + if (search_depth < search_depth_max): #checks that the recursion step is smaller than the limit + reference_pub_obj = get_pub(reference, test_var) + if (type(reference_pub_obj) != Publication): + print(pub) + continue + + reference_pub_obj.group = "depth" + nodes.append(reference_pub_obj) # appends the object to the set of nodes + edges.append([pub.doi_url,reference_pub_obj.doi_url]) #appends the edge to the set of edges + references_pub_obj_list.append(reference_pub_obj) #appends the node to the set of references + + # adds edge only if citation already exists + elif [pub.doi_url,reference] not in edges: + edges.append([pub.doi_url,reference]) + return references_pub_obj_list + + +def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): + ''' + :param references_pub_obj_list: list of publications which references will be added + :type references_pub_obj_list: List[Publication] + + :param search_depth: current depth to search for references + :type search_depth: int + + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + recursive function to implement height-first-search on references + ''' + + # adds next level to nodes/edges + for pub in references_pub_obj_list: + new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) + + # If the maximum height has not yet been reached, calls function recursivly with increased height + if (search_depth < search_depth_max): + process_references_rec(new_reference_pub_obj_list, search_depth+1, search_depth_max, test_var) + + +def add_references(input_nodes, input_edges, references_pub_obj_list, search_height, search_height_max, test_var): + ''' + :param input_nodes: list of nodes from Processing + :type input_nodes: List[Publication] + + :param input_edges: list of edges from Processing + :type input_edges: List[String, String] + + :param references_pub_obj_list: list of publications which references will be added + :type references_pub_obj_list: List[Publication] + + :param search_height: current height to search for references + :type search_height_max: int + + :param search_height_max: maximum height to search for references + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to call recursive depth-first-search of references + ''' + global nodes, edges + nodes = input_nodes + edges = input_edges + + process_references_rec(references_pub_obj_list, search_height, search_height_max, test_var) + return(nodes, edges) \ No newline at end of file diff --git a/verarbeitung/json_demo.py b/verarbeitung/construct_new_graph/export_to_json.py similarity index 100% rename from verarbeitung/json_demo.py rename to verarbeitung/construct_new_graph/export_to_json.py diff --git a/verarbeitung/dev_files/__init__.py b/verarbeitung/dev_files/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/print_graph_test.py b/verarbeitung/dev_files/print_graph_test.py similarity index 94% rename from verarbeitung/print_graph_test.py rename to verarbeitung/dev_files/print_graph_test.py index efb62ec6985338e1eaf8fee099a4b526bde8d8fc..f5e3a03696440d5dabded16f0a3253de6bc08f09 100644 --- a/verarbeitung/print_graph_test.py +++ b/verarbeitung/dev_files/print_graph_test.py @@ -19,9 +19,9 @@ import sys #sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') sys.path.append("../") from input.interface import InputInterface as Input -from Processing import process_main -from import_from_json import input_from_json -from update_graph import check_graph_updates +from verarbeitung.construct_new_graph.Processing import process_main +from verarbeitung.update_graph.import_from_json import input_from_json +from update_graph.update_graph import check_graph_updates # a function to print nodes and edges from a graph def print_graph(nodes, edges): diff --git a/verarbeitung/get_pub_from_input.py b/verarbeitung/get_pub_from_input.py index a8636a4fcf755943d170abbf84e50fce937a67c2..9cf4b9cc33ef9ce6a8b8dd3154fda4db2356da34 100644 --- a/verarbeitung/get_pub_from_input.py +++ b/verarbeitung/get_pub_from_input.py @@ -19,7 +19,7 @@ from pathlib import Path sys.path.append("../") from input.interface import InputInterface as Input -from input_test import input_test_func +from verarbeitung.test.input_test import input_test_func def get_pub(pub_doi, test_var): diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json deleted file mode 100644 index de3ddf528cc6224a68946f5b7fab56276120f741..0000000000000000000000000000000000000000 --- a/verarbeitung/json_text.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "input", "citations": 5}], "links": []} \ No newline at end of file diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/Processing_unittest.py b/verarbeitung/test/Processing_unittest.py similarity index 94% rename from verarbeitung/Processing_unittest.py rename to verarbeitung/test/Processing_unittest.py index def144251db43f2e7a3f7e072ebe7898ca2a7c7d..47dd268885d14a254ced584c24d5a9e4dcafb0e8 100644 --- a/verarbeitung/Processing_unittest.py +++ b/verarbeitung/test/Processing_unittest.py @@ -1,119 +1,123 @@ -import unittest - -from Processing import process_main -from import_from_json import input_from_json -from update_graph import check_graph_updates - -class ProcessingTest(unittest.TestCase): - maxDiff = None - - - def testCycle(self): - nodes, edges = process_main(['doiz1'],1,1,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) - - nodes, edges = process_main(['doiz1'],2,2,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) - self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) - - #def testBigCycle(self): - - #def testEmptyHeight(self): - - #def testEmptyDepth(self): - - def testEmptyDepthHeight(self): - nodes, edges = process_main(['doi1'],0,0,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi1']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi1', 'doi2'],0,0,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doi1','doi2']) - self.assertCountEqual(edges, [['doi1', 'doi2']]) - - nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) - self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) - - - def testInnerEdges(self): - nodes, edges = process_main(['doi_ie1'],1,1,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) - self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) - - def testRightHeight(self): - nodes, edges = process_main(['doi_h01'],1,0,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_h01']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi_h02'],1,0,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) - self.assertCountEqual(edges, [['doi_h1','doi_h02']]) - - nodes, edges = process_main(['doi_h02'],2,0,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) - self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) - - def testRightDepth(self): - nodes, edges = process_main(['doi_d01'],0,1,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_d01']) - self.assertCountEqual(edges, []) - - nodes, edges = process_main(['doi_d02'],0,1,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) - self.assertCountEqual(edges, [['doi_d02','doi_d1']]) - - nodes, edges = process_main(['doi_d02'],0,2,True) - doi_nodes = keep_only_dois(nodes) - self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) - self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) - - def test_import_from_json(self): - nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True) - nodes_new, edges_new = input_from_json('test_output.json') - self.assertCountEqual(nodes_old,nodes_new) - self.assertCountEqual(edges_old, edges_new) - - def test_deleted_input_dois(self): - nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True) - nodes_old_both, edges_old_both = process_main(['doi_lg_1_i','doi_lg_2_i'],2,2,True) - nodes_new_both, edges_new_both = input_from_json('test_output.json') - nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, True) - self.assertCountEqual(nodes_old_single,nodes_new_single) - self.assertCountEqual(edges_old_single, edges_new_single) - - nodes_old_single, edges_old_single = process_main(['doi_cg_i'],3,3,True) - nodes_old_two, edges_old_two = process_main(['doi_lg_1_i','doi_cg_i'],3,3,True) - nodes_old_three, edges_old_three = process_main(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True) - - - - - -def keep_only_dois(nodes): - ''' - :param nodes: input list of nodes of type Publication - :type nodes: List[Publication] - - gets nodes of type pub and return only their doi - ''' - doi_list = [] - for node in nodes: - doi_list.append(node.doi_url) - return doi_list - - -if __name__ == "__main__": +import unittest + +import sys +from pathlib import Path +sys.path.append("../") + +from verarbeitung.construct_new_graph.Processing import process_main +from verarbeitung.update_graph.import_from_json import input_from_json +from verarbeitung.update_graph.update_graph import check_graph_updates + +class ProcessingTest(unittest.TestCase): + maxDiff = None + + + def testCycle(self): + nodes, edges = process_main(['doiz1'],1,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) + + nodes, edges = process_main(['doiz1'],2,2,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) + self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) + + #def testBigCycle(self): + + #def testEmptyHeight(self): + + #def testEmptyDepth(self): + + def testEmptyDepthHeight(self): + nodes, edges = process_main(['doi1'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi1']) + self.assertCountEqual(edges, []) + + nodes, edges = process_main(['doi1', 'doi2'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2']) + self.assertCountEqual(edges, [['doi1', 'doi2']]) + + nodes, edges = process_main(['doi1', 'doi2', 'doi3'],0,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) + self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) + + + def testInnerEdges(self): + nodes, edges = process_main(['doi_ie1'],1,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) + self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) + + def testRightHeight(self): + nodes, edges = process_main(['doi_h01'],1,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h01']) + self.assertCountEqual(edges, []) + + nodes, edges = process_main(['doi_h02'],1,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) + self.assertCountEqual(edges, [['doi_h1','doi_h02']]) + + nodes, edges = process_main(['doi_h02'],2,0,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) + self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) + + def testRightDepth(self): + nodes, edges = process_main(['doi_d01'],0,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d01']) + self.assertCountEqual(edges, []) + + nodes, edges = process_main(['doi_d02'],0,1,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) + self.assertCountEqual(edges, [['doi_d02','doi_d1']]) + + nodes, edges = process_main(['doi_d02'],0,2,True) + doi_nodes = keep_only_dois(nodes) + self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) + self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) + + def test_import_from_json(self): + nodes_old, edges_old = process_main(['doi_lg_1_i'],2,2,True) + nodes_new, edges_new = input_from_json('test_output.json') + self.assertCountEqual(nodes_old,nodes_new) + self.assertCountEqual(edges_old, edges_new) + + def test_deleted_input_dois(self): + nodes_old_single, edges_old_single = process_main(['doi_lg_1_i'],2,2,True) + nodes_old_both, edges_old_both = process_main(['doi_lg_1_i','doi_lg_2_i'],2,2,True) + nodes_new_both, edges_new_both = input_from_json('test_output.json') + nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, True) + self.assertCountEqual(nodes_old_single,nodes_new_single) + self.assertCountEqual(edges_old_single, edges_new_single) + + nodes_old_single, edges_old_single = process_main(['doi_cg_i'],3,3,True) + nodes_old_two, edges_old_two = process_main(['doi_lg_1_i','doi_cg_i'],3,3,True) + nodes_old_three, edges_old_three = process_main(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True) + + + + + +def keep_only_dois(nodes): + ''' + :param nodes: input list of nodes of type Publication + :type nodes: List[Publication] + + gets nodes of type pub and return only their doi + ''' + doi_list = [] + for node in nodes: + doi_list.append(node.doi_url) + return doi_list + + +if __name__ == "__main__": unittest.main() \ No newline at end of file diff --git a/verarbeitung/test/__init__.py b/verarbeitung/test/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/input_test.py b/verarbeitung/test/input_test.py similarity index 98% rename from verarbeitung/input_test.py rename to verarbeitung/test/input_test.py index 4c9bf317c20a41cc8ba134334d92cfa2da8f8d99..928c8b401fc2377b05cb5fe3aa6500ea0c0c0261 100644 --- a/verarbeitung/input_test.py +++ b/verarbeitung/test/input_test.py @@ -1,116 +1,116 @@ -import sys -sys.path.append("../") - -from input.publication import Publication, Citation - - -def input_test_func(pub_doi): - ''' - :param pub_doi: pub doi to find publication in list_of_arrays - :type pub_doi: String - - returns the publication class for given doi - ''' - - for array in list_of_arrays: - if pub_doi == array[0]: - pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7]) - return pub - - -def cit(list_doi): - ''' - :param list_doi list of citation dois to get their Citation Class - :type list_doi: List[String] - - returns a list of citations objects for given doi list - ''' - - cits = [] - for doi_url in list_doi: - for array in list_of_arrays: - if doi_url == array[0]: - cits.append(Citation(array[0], array[1], array[2], array[3], array[4])) - return cits - -def ref(list_doi): - ''' - :param list_doi list of reference dois to get their Reference Class - :type list_doi: List[String] - - returns a list of reference objects for given doi list - ''' - - refs = [] - for doi_url in list_doi: - for array in list_of_arrays: - if doi_url == array[0]: - refs.append(Citation(array[0], array[1], array[2], array[3], array[4])) - return refs - - - -beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] -beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']] -beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []] - -zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']] -zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']] - -inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']] -inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']] -inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []] - -right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []] -right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']] -right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']] -right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']] -right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []] - -right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []] -right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []] -right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []] -right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []] -right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []] - -large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []] -large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []] -large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12','doi_cg_i'], []] -large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_h12','doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']] -large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']] -large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']] -large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']] -large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']] -large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']] -large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']] -large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12','doi_cg_d11']] - -large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []] -large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []] -large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']] -large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']] -large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i','doi_cg_i'], ['doi_lg_2_h21','doi_lg_2_h22']] -large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']] -large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12','doi_cg_i']] -large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']] -large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']] -large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']] -large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] -large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] - -crossed_graph_h21 = ['doi_cg_h21', 'title_cg_h21', ['contributor_cg_h21'], 'journal_cg_h21', 'date_cg_h21', ['subject_cg_h21'], ['doi_cg_h11'], []] -crossed_graph_h22 = ['doi_cg_h22', 'title_cg_h22', ['contributor_cg_h22'], 'journal_cg_h22', 'date_cg_h22', ['subject_cg_h22'], ['doi_cg_h11'], []] -crossed_graph_h11 = ['doi_cg_h11', 'title_cg_h11', ['contributor_cg_h11'], 'journal_cg_h11', 'date_cg_h11', ['subject_cg_h11'], ['doi_cg_i'], ['doi_cg_h21','doi_cg_h22']] -crossed_graph_i = ['doi_cg_i', 'title_cg_i', ['contributor_cg_i'], 'journal_cg_i', 'date_cg_i', ['subject_cg_i'], ['doi_lg_2_i','doi_cg_d11','doi_cg_d12'], ['doi_lg_1_h23','doi_cg_h11','doi_lg_2_h11']] -crossed_graph_d11 = ['doi_cg_d11', 'title_cg_d11', ['contributor_cg_d11'], 'journal_cg_d11', 'date_cg_d11', ['subject_cg_d11'], ['doi_lg_1_d23','doi_cg_d21'], ['doi_cg_i']] -crossed_graph_d12 = ['doi_cg_d12', 'title_cg_d12', ['contributor_cg_d12'], 'journal_cg_d12', 'date_cg_d12', ['subject_cg_d12'], ['doi_cg_d22'], ['doi_cg_i']] -crossed_graph_d21 = ['doi_cg_d21', 'title_cg_d21', ['contributor_cg_d21'], 'journal_cg_d21', 'date_cg_d21', ['subject_cg_d21'], [], ['doi_cg_d11']] -crossed_graph_d22 = ['doi_cg_d22', 'title_cg_d22', ['contributor_cg_d22'], 'journal_cg_d22', 'date_cg_d22', ['subject_cg_d22'], [], ['doi_cg_d12']] - - -list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, - right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3, - large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12, - large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, - large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23, crossed_graph_h21, crossed_graph_h22, crossed_graph_h11, - crossed_graph_i, crossed_graph_d11, crossed_graph_d12, crossed_graph_d21, crossed_graph_d22] +import sys +sys.path.append("../") + +from input.publication import Publication, Citation + + +def input_test_func(pub_doi): + ''' + :param pub_doi: pub doi to find publication in list_of_arrays + :type pub_doi: String + + returns the publication class for given doi + ''' + + for array in list_of_arrays: + if pub_doi == array[0]: + pub = Publication(array[0], array[1], array[2], array[3], array[4], array[5], array[6], array[7]) + return pub + + +def cit(list_doi): + ''' + :param list_doi list of citation dois to get their Citation Class + :type list_doi: List[String] + + returns a list of citations objects for given doi list + ''' + + cits = [] + for doi_url in list_doi: + for array in list_of_arrays: + if doi_url == array[0]: + cits.append(Citation(array[0], array[1], array[2], array[3], array[4])) + return cits + +def ref(list_doi): + ''' + :param list_doi list of reference dois to get their Reference Class + :type list_doi: List[String] + + returns a list of reference objects for given doi list + ''' + + refs = [] + for doi_url in list_doi: + for array in list_of_arrays: + if doi_url == array[0]: + refs.append(Citation(array[0], array[1], array[2], array[3], array[4])) + return refs + + + +beispiel1 = ['doi1', 'title1', ['contributor1'], 'journal1', 'date1', ['subject1'], ['doi2'], ['doi3']] +beispiel2 = ['doi2', 'title2', ['contributor2'], 'journal2', 'date2', ['subject2'], [], ['doi1']] +beispiel3 = ['doi3', 'title3', ['contributor3'], 'journal3', 'date3', ['subject3'], ['doi1'], []] + +zyklus1 = ['doiz1', 'titlez1', ['contributorz1.1', 'contributorz1.2'], 'journalz1', 'datez1', ['subjectz1'], ['doiz2'], ['doiz2']] +zyklus2 = ['doiz2', 'titlez2', ['contributorz2.1', 'contributorz2.2'], 'journalz2', 'datez2', ['subjectz1'], ['doiz1'], ['doiz1']] + +inner_edge1 = ['doi_ie1', 'title_ie1', ['contributor_ie1.1', 'contributor_ie1.2'], 'journal_ie1', 'date_ie1', ['subject_ie1'], ['doi_ie2'], ['doi_ie3']] +inner_edge2 = ['doi_ie2', 'title_ie2', ['contributor_ie2.1', 'contributor_ie2.2'], 'journal_ie2', 'date_ie2', ['subject_ie2'], [], ['doi_ie1','doi_ie3']] +inner_edge3 = ['doi_ie3', 'titlez_ie3', ['contributor_ie3.1', 'contributor_ie3.2'], 'journal_ie3', 'date_ie3', ['subject_ie3'], ['doi_ie1','doi_ie2'], []] + +right_height01 = ['doi_h01', 'title_h01', ['contributor_h01'], 'journal_h01', 'date_h01', ['subject_h01'], [], []] +right_height02 = ['doi_h02', 'title_h02', ['contributor_h02'], 'journal_h02', 'date_h02', ['subject_h02'], [], ['doi_h1']] +right_height1 = ['doi_h1', 'title_h1', ['contributor_h1'], 'journal_h1', 'date_h1', ['subject_h1'], [], ['doi_h2']] +right_height2 = ['doi_h2', 'title_h2', ['contributor_h2'], 'journal_h2', 'date_h2', ['subject_h2'], [], ['doi_h3']] +right_height3 = ['doi_h3', 'title_h3', ['contributor_h3'], 'journal_h3', 'date_h3', ['subject_h3'], [], []] + +right_depth01 = ['doi_d01', 'title_d01', ['contributor_d01'], 'journal_d01', 'date_d01', ['subject_d01'], [], []] +right_depth02 = ['doi_d02', 'title_d02', ['contributor_d02'], 'journal_d02', 'date_d02', ['subject_d01'], ['doi_d1'], []] +right_depth1 = ['doi_d1', 'title_d1', ['contributor_d1'], 'journal_d1', 'date_d1', ['subject_d1'], ['doi_d2'], []] +right_depth2 = ['doi_d2', 'title_d2', ['contributor_d2'], 'journal_d2', 'date_d2', ['subject_d2'], ['doi_d3'], []] +right_depth3 = ['doi_d3', 'title_d3', ['contributor_d3'], 'journal_d3', 'date_d3', ['subject_d3'], [], []] + +large_graph_1_h21 = ['doi_lg_1_h21', 'title_lg_1_h21', ['contributor_lg_1_h21'], 'journal_lg_1_h21', 'date_lg_1_h21', ['subject_lg_1_h21'], ['doi_lg_1_h11'], []] +large_graph_1_h22 = ['doi_lg_1_h22', 'title_lg_1_h22', ['contributor_lg_1_h22'], 'journal_lg_1_h22', 'date_lg_1_h22', ['subject_lg_1_h22'], ['doi_lg_1_h11','doi_lg_1_h12'], []] +large_graph_1_h23 = ['doi_lg_1_h23', 'title_lg_1_h23', ['contributor_lg_1_h23'], 'journal_lg_1_h23', 'date_lg_1_h23', ['subject_lg_1_h23'], ['doi_lg_1_h12','doi_cg_i'], []] +large_graph_1_h11 = ['doi_lg_1_h11', 'title_lg_1_h11', ['contributor_lg_1_h11'], 'journal_lg_1_h11', 'date_lg_1_h11', ['subject_lg_1_h11'], ['doi_lg_1_h12','doi_lg_1_i'], ['doi_lg_1_h21','doi_lg_1_h22']] +large_graph_1_h12 = ['doi_lg_1_h12', 'title_lg_1_h12', ['contributor_lg_1_h12'], 'journal_lg_1_h12', 'date_lg_1_h12', ['subject_lg_1_h12'], ['doi_lg_1_i','doi_lg_1_d12'], ['doi_lg_1_h22','doi_lg_1_h23']] +large_graph_1_i = ['doi_lg_1_i' , 'title_lg_1_i' , ['contributor_lg_1_i'] , 'journal_lg_1_i' , 'date_lg_1_i' , ['subject_lg_1_i'] , ['doi_lg_1_d11','doi_lg_1_d12'], ['doi_lg_1_h11','doi_lg_1_h12']] +large_graph_1_d11 = ['doi_lg_1_d11', 'title_lg_1_d11', ['contributor_lg_1_d11'], 'journal_lg_1_d11', 'date_lg_1_d11', ['subject_lg_1_d11'], ['doi_lg_1_d21','doi_lg_1_d22'], ['doi_lg_1_i']] +large_graph_1_d12 = ['doi_lg_1_d12', 'title_lg_1_d12', ['contributor_lg_1_d12'], 'journal_lg_1_d12', 'date_lg_1_d12', ['subject_lg_1_d12'], ['doi_lg_1_d23'], ['doi_lg_1_h12','doi_lg_1_i']] +large_graph_1_d21 = ['doi_lg_1_d21', 'title_lg_1_d21', ['contributor_lg_1_d21'], 'journal_lg_1_d21', 'date_lg_1_d21', ['subject_lg_1_d21'], ['doi_lg_1_d22'], ['doi_lg_1_d11','doi_lg_1_d22']] +large_graph_1_d22 = ['doi_lg_1_d22', 'title_lg_1_d22', ['contributor_lg_1_d22'], 'journal_lg_1_d22', 'date_lg_1_d22', ['subject_lg_1_d22'], ['doi_lg_1_d21'], ['doi_lg_1_d11','doi_lg_1_d21']] +large_graph_1_d23 = ['doi_lg_1_d23', 'title_lg_1_d23', ['contributor_lg_1_d23'], 'journal_lg_1_d23', 'date_lg_1_d23', ['subject_lg_1_d23'], [], ['doi_lg_1_d12','doi_cg_d11']] + +large_graph_2_h21 = ['doi_lg_2_h21', 'title_lg_2_h21', ['contributor_lg_2_h21'], 'journal_lg_2_h21', 'date_lg_2_h21', ['subject_lg_2_h21'], ['doi_lg_2_h11'], []] +large_graph_2_h22 = ['doi_lg_2_h22', 'title_lg_2_h22', ['contributor_lg_2_h22'], 'journal_lg_2_h22', 'date_lg_2_h22', ['subject_lg_2_h22'], ['doi_lg_2_h11'], []] +large_graph_2_h23 = ['doi_lg_2_h23', 'title_lg_2_h23', ['contributor_lg_2_h23'], 'journal_lg_2_h23', 'date_lg_2_h23', ['subject_lg_2_h23'], ['doi_lg_2_h12','doi_lg_2_h24'], ['doi_lg_2_h24']] +large_graph_2_h24 = ['doi_lg_2_h24', 'title_lg_2_h24', ['contributor_lg_2_h24'], 'journal_lg_2_h24', 'date_lg_2_h24', ['subject_lg_2_h24'], ['doi_lg_2_h12','doi_lg_2_h23','doi_lg_2_d12'], ['doi_lg_2_h23']] +large_graph_2_h11 = ['doi_lg_2_h11', 'title_lg_2_h11', ['contributor_lg_2_h11'], 'journal_lg_2_h11', 'date_lg_2_h11', ['subject_lg_2_h11'], ['doi_lg_2_i','doi_cg_i'], ['doi_lg_2_h21','doi_lg_2_h22']] +large_graph_2_h12 = ['doi_lg_2_h12', 'title_lg_2_h12', ['contributor_lg_2_h12'], 'journal_lg_2_h12', 'date_lg_2_h12', ['subject_lg_2_h12'], ['doi_lg_2_i'], ['doi_lg_2_h23','doi_lg_2_h24']] +large_graph_2_i = ['doi_lg_2_i' , 'title_lg_2_i' , ['contributor_lg_2_i'] , 'journal_lg_2_i' , 'date_lg_2_i' , ['subject_lg_2_i'] , ['doi_lg_2_d11','doi_lg_2_d12'], ['doi_lg_2_h11','doi_lg_2_h12','doi_cg_i']] +large_graph_2_d11 = ['doi_lg_2_d11', 'title_lg_2_d11', ['contributor_lg_2_d11'], 'journal_lg_2_d11', 'date_lg_2_d11', ['subject_lg_2_d11'], [], ['doi_lg_2_i']] +large_graph_2_d12 = ['doi_lg_2_d12', 'title_lg_2_d12', ['contributor_lg_2_d12'], 'journal_lg_2_d12', 'date_lg_2_d12', ['subject_lg_2_d12'], ['doi_lg_2_d21','doi_lg_2_d22'], ['doi_lg_2_h24','doi_lg_2_i']] +large_graph_2_d21 = ['doi_lg_2_d21', 'title_lg_2_d21', ['contributor_lg_2_d21'], 'journal_lg_2_d21', 'date_lg_2_d21', ['subject_lg_2_d21'], [], ['doi_lg_2_d12']] +large_graph_2_d22 = ['doi_lg_2_d22', 'title_lg_2_d22', ['contributor_lg_2_d22'], 'journal_lg_2_d22', 'date_lg_2_d22', ['subject_lg_2_d22'], [], ['doi_lg_2_d12']] +large_graph_2_d23 = ['doi_lg_2_d23', 'title_lg_2_d23', ['contributor_lg_2_d23'], 'journal_lg_2_d23', 'date_lg_2_d23', ['subject_lg_2_d23'], [], ['doi_lg_2_d12']] + +crossed_graph_h21 = ['doi_cg_h21', 'title_cg_h21', ['contributor_cg_h21'], 'journal_cg_h21', 'date_cg_h21', ['subject_cg_h21'], ['doi_cg_h11'], []] +crossed_graph_h22 = ['doi_cg_h22', 'title_cg_h22', ['contributor_cg_h22'], 'journal_cg_h22', 'date_cg_h22', ['subject_cg_h22'], ['doi_cg_h11'], []] +crossed_graph_h11 = ['doi_cg_h11', 'title_cg_h11', ['contributor_cg_h11'], 'journal_cg_h11', 'date_cg_h11', ['subject_cg_h11'], ['doi_cg_i'], ['doi_cg_h21','doi_cg_h22']] +crossed_graph_i = ['doi_cg_i', 'title_cg_i', ['contributor_cg_i'], 'journal_cg_i', 'date_cg_i', ['subject_cg_i'], ['doi_lg_2_i','doi_cg_d11','doi_cg_d12'], ['doi_lg_1_h23','doi_cg_h11','doi_lg_2_h11']] +crossed_graph_d11 = ['doi_cg_d11', 'title_cg_d11', ['contributor_cg_d11'], 'journal_cg_d11', 'date_cg_d11', ['subject_cg_d11'], ['doi_lg_1_d23','doi_cg_d21'], ['doi_cg_i']] +crossed_graph_d12 = ['doi_cg_d12', 'title_cg_d12', ['contributor_cg_d12'], 'journal_cg_d12', 'date_cg_d12', ['subject_cg_d12'], ['doi_cg_d22'], ['doi_cg_i']] +crossed_graph_d21 = ['doi_cg_d21', 'title_cg_d21', ['contributor_cg_d21'], 'journal_cg_d21', 'date_cg_d21', ['subject_cg_d21'], [], ['doi_cg_d11']] +crossed_graph_d22 = ['doi_cg_d22', 'title_cg_d22', ['contributor_cg_d22'], 'journal_cg_d22', 'date_cg_d22', ['subject_cg_d22'], [], ['doi_cg_d12']] + + +list_of_arrays = [beispiel1, beispiel2, beispiel3, zyklus1, zyklus2, inner_edge1, inner_edge2, inner_edge3, + right_height01, right_height02, right_height1, right_height2, right_height3, right_depth01, right_depth02, right_depth1, right_depth2, right_depth3, + large_graph_1_h21, large_graph_1_h22, large_graph_1_h23, large_graph_1_h11, large_graph_1_h12, large_graph_1_i, large_graph_1_d11, large_graph_1_d12, + large_graph_1_d21, large_graph_1_d22, large_graph_1_d23, large_graph_2_h21, large_graph_2_h22, large_graph_2_h23, large_graph_2_h24, large_graph_2_h11, large_graph_2_h12, + large_graph_2_i, large_graph_2_d11, large_graph_2_d12, large_graph_2_d21, large_graph_2_d22, large_graph_2_d23, crossed_graph_h21, crossed_graph_h22, crossed_graph_h11, + crossed_graph_i, crossed_graph_d11, crossed_graph_d12, crossed_graph_d21, crossed_graph_d22] diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json index b013fbfb4c1ac100c93ae21dfe722b309c10a14b..8a15eeb45e260ad464757e4ce1f50f940553c406 100644 --- a/verarbeitung/test_output.json +++ b/verarbeitung/test_output.json @@ -1 +1 @@ -{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 1}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "input", "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "depth", "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "height", "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "height", "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "height", "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "depth", "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "depth", "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]} \ No newline at end of file diff --git a/verarbeitung/Kanten_Vergleich.py b/verarbeitung/update_graph/Kanten_Vergleich.py similarity index 100% rename from verarbeitung/Kanten_Vergleich.py rename to verarbeitung/update_graph/Kanten_Vergleich.py diff --git a/verarbeitung/Knoten_Vergleich.py b/verarbeitung/update_graph/Knoten_Vergleich.py similarity index 100% rename from verarbeitung/Knoten_Vergleich.py rename to verarbeitung/update_graph/Knoten_Vergleich.py diff --git a/verarbeitung/update_graph/__init__.py b/verarbeitung/update_graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/verarbeitung/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py similarity index 79% rename from verarbeitung/connect_new_input.py rename to verarbeitung/update_graph/connect_new_input.py index b9167dca95d8f2c7175c3820428d366a78468eaa..c61b00a818012eebf8af48697e9247572cec83f1 100644 --- a/verarbeitung/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -1,67 +1,69 @@ -# -*- coding: utf-8 -*- -""" -Functions to update a graph representing citations between multiple ACS/Nature journals - -""" - -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" -__email__ = "cis-project2021@zbh.uni-hamburg.de" -__status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" - -import sys -from pathlib import Path -from os import error -sys.path.append("../") - -from import_form_json import input_from_json -from Processing import initialize_nodes_list, process_citations_rec, process_references_rec, complete_inner_edges, create_graph_structure_references, create_graph_structure_citations -from json_demo import output_to_json - -def connect_old_and_new_input(json_file, new_doi_list, search_height, search_depth, test_var = False): - global nodes, edges - nodes = [] - edges = [] - - nodes, edges = input_from_json(json_file) - - complete_changed_group_nodes(new_doi_list, search_height, search_depth, test_var) - - # initializes nodes/edges from input and gets a list with publication objects for citations and references returned - references_obj_list, citations_obj_list = initialize_nodes_list(new_doi_list,search_depth, search_height, test_var) - - # function calls to begin recursive processing up to max depth/height - process_citations_rec(citations_obj_list, 1, search_height, test_var) - process_references_rec(references_obj_list, 1, search_depth, test_var) - - # adds edges between reference group and citation group of known publications - complete_inner_edges(test_var) - - # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes,edges, test_var) - - return(nodes, edges) - - -def complete_changed_group_nodes(new_doi_list, search_height_max, search_depth_max, test_var): - changed_group_node_citations = [] - changed_group_node_references = [] - - for node in nodes: - if (node.group != "input") and (node.doi in new_doi_list): - node.group = "input" - - # inserts references as publication objects into list and - # inserts first depth references into nodes/edges if maximum search depth > 0 - for reference in create_graph_structure_references(node, 0, search_depth_max, test_var): - changed_group_node_references.append(reference) - - # inserts citations as publication objects into list and - # inserts first height citations into nodes if maximum search height > 0 - for citation in create_graph_structure_citations(node, 0, search_height_max, test_var): - changed_group_node_citations.append(citation) - +# -*- coding: utf-8 -*- +""" +Functions to update a graph representing citations between multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +from pathlib import Path +from os import error +sys.path.append("../") + +from import_from_json import input_from_json +from verarbeitung.construct_new_graph.Processing import initialize_nodes_list, complete_inner_edges, create_graph_structure_references, create_graph_structure_citations +from verarbeitung.construct_new_graph.add_citations_rec import add_citations +from verarbeitung.construct_new_graph.add_references_rec import add_references +from verarbeitung.construct_new_graph.export_to_json import output_to_json + +def connect_old_and_new_input(json_file, new_doi_list, search_height, search_depth, test_var = False): + global nodes, edges + nodes = [] + edges = [] + + nodes, edges = input_from_json(json_file) + + complete_changed_group_nodes(new_doi_list, search_height, search_depth, test_var) + + # initializes nodes/edges from input and gets a list with publication objects for citations and references returned + references_obj_list, citations_obj_list = initialize_nodes_list(new_doi_list,search_depth, search_height, test_var) + + # function calls to begin recursive processing up to max depth/height + add_citations(nodes, edges, citations_obj_list, 1, search_height, test_var) + add_references(nodes, edges, references_obj_list, 1, search_depth, test_var) + + # adds edges between reference group and citation group of known publications + complete_inner_edges(test_var) + + # calls a skript to save nodes and edges of graph in .json file + output_to_json(nodes,edges, test_var) + + return(nodes, edges) + + +def complete_changed_group_nodes(new_doi_list, search_height_max, search_depth_max, test_var): + changed_group_node_citations = [] + changed_group_node_references = [] + + for node in nodes: + if (node.group != "input") and (node.doi in new_doi_list): + node.group = "input" + + # inserts references as publication objects into list and + # inserts first depth references into nodes/edges if maximum search depth > 0 + for reference in create_graph_structure_references(node, 0, search_depth_max, test_var): + changed_group_node_references.append(reference) + + # inserts citations as publication objects into list and + # inserts first height citations into nodes if maximum search height > 0 + for citation in create_graph_structure_citations(node, 0, search_height_max, test_var): + changed_group_node_citations.append(citation) + diff --git a/verarbeitung/import_from_json.py b/verarbeitung/update_graph/import_from_json.py similarity index 99% rename from verarbeitung/import_from_json.py rename to verarbeitung/update_graph/import_from_json.py index 11e5d82f29c7d254422da26fb5e723db24341585..748e541a0b128ec40b5b5c7e1dcb6df9998dd103 100644 --- a/verarbeitung/import_from_json.py +++ b/verarbeitung/update_graph/import_from_json.py @@ -16,7 +16,7 @@ __status__ = "Production" import json import sys -sys.path.append("../") +sys.path.append("../../") from input.publication import Publication, Citation diff --git a/verarbeitung/update_graph.py b/verarbeitung/update_graph/update_graph.py similarity index 95% rename from verarbeitung/update_graph.py rename to verarbeitung/update_graph/update_graph.py index 985345630f3388c4cdbcbd888a1e06674509f33a..0e4f52a69ba09901e2f12ae01e94b286cc603c93 100644 --- a/verarbeitung/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -17,12 +17,12 @@ __status__ = "Production" import sys from pathlib import Path from os import error -sys.path.append("../") +sys.path.append("../../") from input.publication import Publication -from get_pub_from_input import get_pub -from Knoten_Vergleich import doi_listen_vergleichen -from update_graph_del import delete_nodes_and_edges +from verarbeitung.get_pub_from_input import get_pub +from .Knoten_Vergleich import doi_listen_vergleichen +from .update_graph_del import delete_nodes_and_edges def get_old_input_dois(old_obj_input_list): diff --git a/verarbeitung/update_graph_del.py b/verarbeitung/update_graph/update_graph_del.py similarity index 97% rename from verarbeitung/update_graph_del.py rename to verarbeitung/update_graph/update_graph_del.py index 10ca42b806eeaa08e2e7a9492cc9dadd9f1d7f62..df958d1359eb1f984fa5514cfb259a4f52634f7c 100644 --- a/verarbeitung/update_graph_del.py +++ b/verarbeitung/update_graph/update_graph_del.py @@ -15,10 +15,9 @@ __status__ = "Production" import sys from pathlib import Path -sys.path.append("../") +sys.path.append("../../") -from input.interface import InputInterface as Input -from Kanten_Vergleich import back_to_valid_edges +from .Kanten_Vergleich import back_to_valid_edges def delete_ref_nodes_rec(pub):