diff --git a/README.md b/README.md index d3a7e438ee56b341db5d7a1eabcb996e154f103f..c109cc8df99fda480c5ac4b666d258c53c6e46c8 100644 --- a/README.md +++ b/README.md @@ -1,38 +1,47 @@ -# Projekt CiS-Biochemie 2021-22 UI +# Projekt CiS-Biochemie 2021-22 -# Benötigt: -- Dash -- Pandas -- beautifulsoup4 -- requests +# Requirements: + +- Alle requirements sind in der 'requirements' Datei im Hauptverzeichnis zu + finden. +- Installation: pip3 install -r requirements # Starten des Programms: -Ausführen von citation_parser_ui.py und einfügen des entstandenen Liks in einen Browser. -Danach müsste sich die Benutzeroberfläche im Browser öffnen. +Um das Programm nutzen zu können muss zuerst \grqq citation\_parser\_ui.py\grqq \, ausgeführt werden und der entstandene Lik in einen Browser eingefügt werden. Danach öffnet sich die Benutzeroberfläche im Browser. # Übersicht der Benutzeroberfläche: - Show Info: Durch wiederholtes klicken kann das Fenster ein und aus geblendet werden. -- Input: Die Eingabe erfolgt in Form eines DOI ("Digital Object Identifier") +- Input: Die Eingabe erfolgt in Form eines DOI ("Digital Object Identifier") oder Hyperlink + +- Drag and drop or click to select a file to upload: Mehrere DOI oder Hyperlinks in einem .txt-Dokument (genau ein Link pro Zeile). -- Drag and drop or click to select a file to upload: Mehrere DOI in einem txt-Dokument (genau ein DOI pro Zeile). +- Reference Depth: die Tiefe der Artikel welche von der Eingabe zitiert werden. -- Recursion: die beiden noch unbeschrifteten Felder rechts neben Input sind für die Rekursionstiefen in beide Richtungen +- Cited-by Depth: die Tiefe derjenigen welche de Eingegebenen Artikel Zitieren. - Clear All: alle Eingaben werden gelöscht - Clear Selected: alle markierten Eingaben werden gelöscht -- Generate Graph: generiert den zugehörigen Graphen (generiert momentan nur einen string) +- Generate Graph: generiert den zugehörigen Graphen - Update Automatically: automatische Aktualisierung des Graphen bei jeder neuen Eingabe -- Smart Input: direkte Überprüfung der Eingabe auf Richtigkeit zudem wird nicht mehr der DOI angezeigt sondern: +- Smart Input: direkte Überprüfung der Eingabe auf Richtigkeit zudem wird nicht mehr der DOI oder Hyperlink angezeigt sondern: Der Autor, Das Journal, Das Veröffentlichungsdatum. (muss vor Hinzufügen aktiviert worden sein) ## Autoren - Isabelle Siebels - Sebastian David +- Florian Jochens +- Julius Schenk +- Samuel Ockenden +- Alina Molkentin +- Donna Löding +- Malte Schokolowski +- Katja Ehlers +- Merle Stahl diff --git a/citation_parser_main.py b/citation_parser_main.py index 342ac6cb9ec77e204f46d8ccac8425ff8d46e1fd..988e57206cd426be825cb6c50a4157755501597c 100644 --- a/citation_parser_main.py +++ b/citation_parser_main.py @@ -15,14 +15,14 @@ import plotly.express as px import dash_bootstrap_components as dbc # pip install dash-bootstrap-components - -app = dash.Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB]) #SPACELAB https://bootswatch.com/default/ for more themes) +#SPACELAB is added as a style by https://bootswatch.com/default/ +app = dash.Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB]) # List of options when inputting data and generating the graph additional_options = ['Update Automatically','Smart Input'] # Reads the contents of info_box.txt. -# They can later be displayed by pressing the corresponding button. +# They can later be displayed by pressing the corresponding "Show Info" button. f = open('info_box.txt', 'r') boxcontent = f.read() f.close() @@ -30,6 +30,7 @@ f.close() app.layout = html.Div([ html.Div(children=[ # Layer 0: For the Header and Help Function(s) + #Show Info button for showing Informations dbc.Button( 'Show Info', id='collapse-button', @@ -37,6 +38,7 @@ html.Div(children=[ color="primary", n_clicks=0, ), + #state is set to be collapsed at first dbc.Collapse( dbc.Card(dbc.CardBody(html.Div(boxcontent, style={'whiteSpace': 'pre-line', 'font-size': '10px'}))), id='collapse', @@ -72,14 +74,14 @@ html.Div(children=[ #Drag and drop or click to select a file to upload ["Drag and drop"]), style={ - "width": "360px", + "width": "400px", "height": "60px", "lineHeight": "60px", "borderWidth": "1px", "borderStyle": "dashed", "borderRadius": "5px", "textAlign": "center", - "margin": "10px" + "margin": "10px", }),size="lg", color="primary", type="border", fullscreen=True,), ]), @@ -244,10 +246,12 @@ def update_input_checklist(input_value,btn1,btn2,filecontents,all_inputs, ) def toggle_collapse(n, is_open): ''' - This callback shows and hides the (first) info-box by, checking how# often - the button has been pressed. The text was loaded at the top. - :param n_clicks: number of times show-info has been clicked. - 'type n_clicks: int + This callback shows and hides the info-box. The text was loaded at the top from an extern .txt. + + :param n: number of times show-info has been clicked. + :type n: int + :param is_open state of the show-info button + :type is_open: boolean ''' if n: return not is_open @@ -280,19 +284,23 @@ def generate_output(n_clicks,all_inputs,forward_depth,backward_depth,additional_ :type additional_options: list of strings ''' changed_id = [p['prop_id'] for p in callback_context.triggered][0] - if n_clicks is None: - raise PreventUpdate - elif 'Update Automatically' in additional_options \ + + # This branch is entered when 'Generate Graph' is pushed or 'Update Automatically' is selected + # and the checklist or recursion depths are changed. + if 'Update Automatically' in additional_options \ or 'start-button' in changed_id: input_links = [x['value'] for x in all_inputs] + # Processing only returns a potential error message. The data for the graph is simply saved + # in the specified file. errors = Processing(input_links,int(forward_depth),int(backward_depth),'assets/json_text.json') if errors: + # This construct is needed because dash removes white spaces and newlines when concatinating + # strings with '\n'.join(...) or similar methods. message = ['The following inputs are invalid and were not used:'] for error in errors: message.append(html.Br()) message.append(error) message = html.P(message) - #message = [html.P(error) for error in errors] return message if __name__ == '__main__': diff --git a/info_box.txt b/info_box.txt index 2fac1417332f1bac73e3c108d2da5ddbb62d7fd4..f800bf817347efa66036ab64be73f19d7250dfc7 100644 --- a/info_box.txt +++ b/info_box.txt @@ -12,3 +12,4 @@ By entering a DOI ("Digital Object Identifier") or hyperlink to a scientific art Smart Input: Direct verification for correctness of the input. In addition, the DOI is no longer displayed but: The Author, The Journal, The Publication Date. (must be activated before adding the DOI) + diff --git a/input/get/journal_fetcher.py b/input/get/journal_fetcher.py index 03ce6324dc38aae05bbabbb6512183e498f9d7a3..3db36bf8fe0aaa505352d13367304b5407decf02 100755 --- a/input/get/journal_fetcher.py +++ b/input/get/journal_fetcher.py @@ -9,7 +9,6 @@ from bs4 import BeautifulSoup import requests from input.publication import Publication - class JournalFetcher(metaclass=ABCMeta): """ This is a abstract-class for fetcher modules. @@ -118,4 +117,4 @@ class JournalFetcher(metaclass=ABCMeta): ,"Nucleic Acids Res.":"Nucleic Acids Research" ,"Build. Environ.":"Building and Environment" ,"Sci. Rep.":"Scientific Reports" - } \ No newline at end of file + } diff --git a/input/requirements.txt b/input/requirements.txt deleted file mode 100644 index a151126691e7f0a9f1c824e9cbac243a96b32e71..0000000000000000000000000000000000000000 --- a/input/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -beautifulsoup4 -requests \ No newline at end of file diff --git a/requirements b/requirements index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..08df71982af3b87747038cc9387723b3ef9dfb5c 100644 --- a/requirements +++ b/requirements @@ -0,0 +1,6 @@ +requests +beautifulsoup4 +plotly +dash +pandas +dash-bootstrap-components diff --git a/verarbeitung/construct_new_graph/export_to_json.py b/verarbeitung/construct_new_graph/export_to_json.py index 0aaf996b8cf0aace22b0d4c20de65148ccacbaa4..d093f63f43cfe2b3c81e2c87ad3b15b195ef934c 100644 --- a/verarbeitung/construct_new_graph/export_to_json.py +++ b/verarbeitung/construct_new_graph/export_to_json.py @@ -61,7 +61,7 @@ def format_edges(edges): return list_of_edge_dicts -def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False): +def output_to_json(nodes, edges, search_depth, search_height, json_file = 'json_text.json', test_var = False): ''' :param nodes: list of publications to export to json :type nodes: List[Publication] @@ -79,6 +79,7 @@ def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False) list_of_edge_dicts = format_edges(edges) dict_of_all["nodes"] = list_of_node_dicts dict_of_all["links"] = list_of_edge_dicts + dict_of_all["depth_height"] = [search_depth, search_height] if (test_var): if json_file != 'json_text.json': with open(json_file,'w') as outfile: diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index 1368f6ea92e4b030b9a37fdbd7fb5ff2573968b2..4615d5eb67914b8dd81e050d998bb1c61fd8ac33 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -85,6 +85,7 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t for node in nodes: #iterates over every node in the set of nodes if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set not_in_nodes = False #false --> node will not be created + node.group = 0 break if (not_in_nodes): #there is no node with this doi in the set nodes.append(pub) #appends Publication Object @@ -106,10 +107,24 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t -def complete_inner_edges(): +def complete_inner_edges(update_var = False, input_nodes = [], input_edges = []): ''' + :param update_var: variable to check if call is from update_graph with known nodes and edges or fresh construction + :type update_var: boolean + + :param input_nodes: list of publications from update_graph + :type input_nodes: List[Publication] + + :param input_edges: list of links from update_graph + :type input_edges: List[List[String,String]] + completes inner edges between nodes of group height and depth ''' + if update_var: + global nodes, edges + + nodes = input_nodes + edges = input_edges for node in nodes: if (node.group < 0): @@ -138,14 +153,14 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va :param test_var: variable to differenciate between test and url call :type test_var: boolean - :param test_var: variable to check if call is from update_graph with known nodes and edges or fresh construction - :type test_var: boolean + :param update_var: variable to check if call is from update_graph with known nodes and edges or fresh construction + :type update_var: boolean :param input_nodes: list of publications from update_graph :type input_nodes: List[Publication] - :param input_nodes: list of links from update_graph - :type input_nodes: List[List[String,String]] + :param input_edges: list of links from update_graph + :type input_edges: List[List[String,String]] main function to start graph generation ''' diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index 183e7647e90c750badbd216f3ec62d9935c79e0c..f77fc38c5d062f2c4eb808b2180e4256c493681b 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}], "depth_height": ["new_height.json", true]} \ No newline at end of file diff --git a/verarbeitung/new_height.json b/verarbeitung/new_height.json index 6d4d75d2064919529bc5ea85e2e9b9d5609ed1b4..0cc59401969ce8a984fee556a67099167f2144a6 100644 --- a/verarbeitung/new_height.json +++ b/verarbeitung/new_height.json @@ -1 +1 @@ -{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}], "depth_height": [2, 2]} \ No newline at end of file diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py index 78df2dbf3f0a3c4f84a27620a523048d9e57de27..581c4260c6603efdb4fa6d800b086872f90a09f5 100644 --- a/verarbeitung/process_main.py +++ b/verarbeitung/process_main.py @@ -50,7 +50,7 @@ def Processing(url_list, search_depth, search_height, json_file = 'json_text.jso nodes, edges, error_doi_list = init_graph_construction(url_list, search_depth, search_height) # exports graph to given json file name - output_to_json(nodes, edges, json_file) + output_to_json(nodes, edges, search_depth, search_height, json_file) return error_doi_list \ No newline at end of file diff --git a/verarbeitung/start_script.py b/verarbeitung/start_script.py index 43e9bcc1310e4060f1b2ecba979bb419fdda18ab..92295eefbd363053762a3d6df99d028af44151b1 100644 --- a/verarbeitung/start_script.py +++ b/verarbeitung/start_script.py @@ -6,16 +6,17 @@ from verarbeitung.process_main import Processing doi_list = [] -#doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') +doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') #doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.6b00709') #doi_list.append('https://doi.org/10.1021/acs.chemrev.8b00728') #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.chemrestox.0c00006')# -doi_list.append('https://doi.org/10.1021/acs.chemrev.8b00728') -doi_list.append('https://doi.org/10.1021/acs.jpclett.1c03335 ') -error_list = Processing(doi_list, 2, 2, 'test728.json') +#doi_list.append('https://doi.org/10.1021/acs.chemrev.8b00728') +#doi_list.append('https://doi.org/10.1021/acs.jpclett.1c03335 ') +doi_list.append('https://doi.org/10.1021/acs.chemrestox.5b00481') +error_list = Processing(doi_list, 2, 2, 'test481.json') print(error_list) del doi_list diff --git a/verarbeitung/test/update_graph_unittest.py b/verarbeitung/test/update_graph_unittest.py index 77746ffeb0ee3aa5375020a4cf8d5aa218520821..62504e513e70b23abffd487cfa6f48f22452e2fb 100644 --- a/verarbeitung/test/update_graph_unittest.py +++ b/verarbeitung/test/update_graph_unittest.py @@ -23,7 +23,7 @@ class UpdatingTest(unittest.TestCase): def test_deleted_input_dois(self): nodes_old_single, edges_old_single, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) nodes_old_both, edges_old_both, err_list = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True) - output_to_json(nodes_old_both, edges_old_both, test_var=True) + output_to_json(nodes_old_both, edges_old_both, 2, 2, test_var=True) nodes_new_single, edges_new_single, err_list = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True) self.assertCountEqual(nodes_old_single,nodes_new_single) self.assertCountEqual(edges_old_single, edges_new_single) @@ -37,13 +37,13 @@ class UpdatingTest(unittest.TestCase): nodes_height_1, edges_height_1, err_list = init_graph_construction(['doi_lg_1_i'],2,1,True) nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) - output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) + output_to_json(nodes_height_2, edges_height_2, 2, 2, 'new_height.json', True) nodes_new_height_1, edges_new_height_1, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True) self.assertCountEqual(nodes_height_1, nodes_new_height_1) self.assertCountEqual(edges_height_1, edges_new_height_1) nodes_height_2, edges_height_2, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) - output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) + output_to_json(nodes_height_2, edges_height_2, 2, 2, 'new_height.json', True) nodes_new_height_0, edges_new_height_0, err_list = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True) self.assertCountEqual(nodes_height_0, nodes_new_height_0) self.assertCountEqual(edges_height_0, edges_new_height_0) @@ -51,7 +51,7 @@ class UpdatingTest(unittest.TestCase): def test_ref_to_input(self): nodes, edges, err_list = init_graph_construction(['doi_cg_i'], 2, 2, True) nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11'], 2, 2, True) - output_to_json(nodes, edges, 'ref_to_input.json') + output_to_json(nodes, edges, 2, 2, 'ref_to_input.json') new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11'], 'ref_to_input.json', 2, 2, True) self.assertCountEqual(new_nodes, nodes_2) self.assertCountEqual(new_edges, edges_2) @@ -59,12 +59,12 @@ class UpdatingTest(unittest.TestCase): nodes, edges, err_list = init_graph_construction(['doi_cg_i','doi_lg_2_i'], 2, 2, True) nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_d11','doi_lg_2_i'], 2, 2, True) - output_to_json(nodes, edges, 'ref_to_input.json') + output_to_json(nodes, edges, 2, 2, 'ref_to_input.json') new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) self.assertCountEqual(new_nodes, nodes_2) self.assertCountEqual(new_edges, edges_2) - output_to_json(nodes_2, edges_2, 'ref_to_input.json') + output_to_json(nodes_2, edges_2, 2, 2, 'ref_to_input.json') new_nodes, new_edges, new_err_list = update_graph(['doi_cg_d11','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) self.assertCountEqual(new_nodes, nodes_2) self.assertCountEqual(new_edges, edges_2) @@ -72,7 +72,7 @@ class UpdatingTest(unittest.TestCase): nodes, edges, err_list = init_graph_construction(['doi_cg_i','doi_lg_2_i'], 2, 2, True) nodes_2, edges_2, err_list_2 = init_graph_construction(['doi_cg_i','doi_lg_2_h11','doi_lg_1_i'], 3, 3, True) - output_to_json(nodes_2, edges_2, 'ref_to_input.json') + output_to_json(nodes_2, edges_2, 2, 2, 'ref_to_input.json') new_nodes, new_edges, new_err_list = update_graph(['doi_cg_i','doi_lg_2_i'], 'ref_to_input.json', 2, 2, True) self.assertCountEqual(new_nodes, nodes) self.assertCountEqual(new_edges, edges) @@ -172,8 +172,8 @@ class UpdatingTest(unittest.TestCase): def test_input_from_json(self): nodes_old, edges_old, err_list = init_graph_construction(['doi_lg_1_i'],2,2,True) - output_to_json(nodes_old, edges_old, test_var = True) - nodes_new, edges_new = input_from_json('test_output.json') + output_to_json(nodes_old, edges_old, 2, 2, test_var = True) + nodes_new, edges_new, old_depth, old_height = input_from_json('test_output.json') self.assertCountEqual(nodes_old,nodes_new) self.assertCountEqual(edges_old, edges_new) @@ -220,7 +220,7 @@ class UpdatingTest(unittest.TestCase): pub_cg_d11 = input_test_func('doi_cg_d12') pub_cg_d11.group = -1 pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_cg_i, pub_cg_d11, pub_cg_d12, pub_cg_h11] - usable_nodes = search_ref_cit_graph_rec_test(pubs, [pub_cg_i], "Citation") + usable_nodes = search_ref_cit_graph_rec_test(pubs, [pub_cg_i], 2, "Citation") self.assertCountEqual(usable_nodes, [pub_cg_h11, pub_lg_2_h11, pub_lg_2_h21, pub_lg_2_h22]) # compare_old_and_new_node_lists.py: @@ -257,11 +257,11 @@ class UpdatingTest(unittest.TestCase): pub_lg_2_d22.group = -2 pubs = [pub_lg_2_i, pub_lg_2_h11, pub_lg_2_h12, pub_lg_2_d11, pub_lg_2_d12, pub_lg_2_h21, pub_lg_2_h22, pub_lg_2_d21, pub_lg_2_d22] - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 1, "Citation"),[pub_lg_2_h11]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, 2, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_h11, 2, 1, "Citation"),[pub_lg_2_h21, pub_lg_2_h22]) - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) - self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 1, "Reference"),[pub_lg_2_d11]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, 2, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) + self.assertCountEqual(find_furthermost_citations_test(pubs, [], pub_lg_2_d11, 2, 1, "Reference"),[pub_lg_2_d21, pub_lg_2_i]) def test_complete_changed_group_nodes(self): diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json index 6d4d75d2064919529bc5ea85e2e9b9d5609ed1b4..0cc59401969ce8a984fee556a67099167f2144a6 100644 --- a/verarbeitung/test_output.json +++ b/verarbeitung/test_output.json @@ -1 +1 @@ -{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "abstract": null, "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "abstract": null, "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "abstract": null, "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "abstract": null, "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "abstract": null, "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "abstract": null, "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}], "depth_height": [2, 2]} \ No newline at end of file diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index f4d2ef93ac7ec3c74b079dc415b67c158cde5c57..7dcad9b979fa36a221ee48c5ca4ac4e8da243161 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -88,7 +88,7 @@ def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new new_citations.append(new_cit_node) # change height accordingly and add link to edge - new_cit_node.group = node.group + depth + 1 + new_cit_node.group = node.group + depth if [cit_node.doi_url,cit_node.doi_url] not in new_edges: new_edges.append([new_cit_node.doi_url,cit_node.doi_url]) @@ -104,7 +104,7 @@ def find_furthermost_citations(new_nodes, new_edges, node, old_search_depth, new new_citations.append(new_cit_node) # change height accordingly and add link to edge - new_cit_node.group = node.group - depth - 1 + new_cit_node.group = node.group + depth if [cit_node.doi_url, new_cit_node.doi_url] not in new_edges: new_edges.append([cit_node.doi_url, new_cit_node.doi_url]) @@ -241,9 +241,10 @@ def connect_old_and_new_input(input_nodes_list, input_edges_list, inserted_nodes # function call to begin recursive processing up to max depth/height for unhandled nodes - nodes, edges, error_doi_list_new = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = new_nodes, input_edges = new_edges) - for err_node in error_doi_list_new: - if err_node not in error_doi_list: - error_doi_list.append(err_node) + if len(not_handled_inserted_nodes) > 0: + new_nodes, new_edges, error_doi_list_new = init_graph_construction(not_handled_inserted_nodes, new_search_depth, new_search_height, test_var = test_var, update_var = True, input_nodes = new_nodes, input_edges = new_edges) + for err_node in error_doi_list_new: + if err_node not in error_doi_list: + error_doi_list.append(err_node) - return(nodes, edges, error_doi_list) \ No newline at end of file + return(new_nodes, new_edges, error_doi_list) \ No newline at end of file diff --git a/verarbeitung/update_graph/delete_nodes_edges.py b/verarbeitung/update_graph/delete_nodes_edges.py index 205c82e0c814a0901a47011f3ee9e826ae31a8ab..08d0dbb81434cfa2e3aa292283bde4fdbc216564 100644 --- a/verarbeitung/update_graph/delete_nodes_edges.py +++ b/verarbeitung/update_graph/delete_nodes_edges.py @@ -20,76 +20,100 @@ sys.path.append("../../") from .update_edges import back_to_valid_edges -def search_ref_cit_graph_rec_test(pubs, new_test_input, cit_var): +def search_ref_cit_graph_rec_test(pubs, new_test_input, old_max_depth, cit_var): global usable_nodes, input_obj_list usable_nodes = [] input_obj_list = pubs if cit_var == "Reference": for pub in new_test_input: - search_ref_graph_rec(pub) + search_ref_graph_rec(pub, 1, old_max_depth) elif cit_var == "Citation": for pub in new_test_input: - search_cit_graph_rec(pub) + search_cit_graph_rec(pub, 1, old_max_depth) return usable_nodes -def search_ref_graph_rec(pub): +def search_ref_graph_rec(pub, curr_depth, old_max_depth): ''' :param pub: pub go get appended to usable_nodes :type pub: Publication + + :param curr_depth: current recursion depth + :type curr_depth: int + + :param old_max_depth: old max search depth + :type old_max_depth: int function that appends nodes of group "reference" to list usable_nodes, if they are reachable from input nodes ''' + usable_doi_nodes = [] for reference in pub.references: for ref_pub in input_obj_list: if ((reference.doi_url == ref_pub.doi_url) and (ref_pub not in usable_nodes)): usable_nodes.append(ref_pub) + usable_doi_nodes.append(ref_pub.doi_url) # to find a cyclus and avoid recursion error not_in_citations = True for citation in pub.citations: - if (reference.doi_url == citation.doi_url): + if (reference.doi_url == citation.doi_url and citation.doi_url not in usable_doi_nodes): not_in_citations = False break - if (not_in_citations): - search_ref_graph_rec(ref_pub) + if not_in_citations and curr_depth < old_max_depth: + search_ref_graph_rec(ref_pub, curr_depth + 1, old_max_depth) -def search_cit_graph_rec(pub): +def search_cit_graph_rec(pub, curr_height, old_max_height): ''' - :param pub: pub go get appended to usable_nodes - :type pub: Publication + :param pub: pub go get appended to usable_nodes + :type pub: Publication + + :param curr_height: current recursion height + :type curr_height: int + + :param old_max_height: old max search height + :type old_max_height: int function that appends nodes of group "citation" to list usable_nodes, if they are reachable from input nodes ''' + + usable_doi_nodes = [] for citation in pub.citations: for cit_pub in input_obj_list: - if ((citation.doi_url == cit_pub.doi_url) and (cit_pub not in usable_nodes)): - usable_nodes.append(cit_pub) + if ((citation.doi_url == cit_pub.doi_url)): + if cit_pub not in usable_nodes: + usable_nodes.append(cit_pub) + usable_doi_nodes.append(cit_pub.doi_url) # to find a cyclus and avoid recursion error not_in_references = True for reference in pub.references: - if (citation.doi_url == reference.doi_url): + if (citation.doi_url == reference.doi_url and reference.doi_url not in usable_doi_nodes): not_in_references = False break - if (not_in_references): - search_cit_graph_rec(cit_pub) + if not_in_references and curr_height < old_max_height: + search_cit_graph_rec(cit_pub,curr_height + 1, old_max_height) -def delete_nodes_and_edges(input_list, common_nodes, old_edges_list): +def delete_nodes_and_edges(input_list, common_nodes, old_edges_list, old_depth, old_height): ''' :param input_list: list of publications to get reduced :type input_list: List[Publication] - :param common_nodes: list of input dois which are in old and new input call - :type common_nodes: List[String] + :param common_nodes: list of input dois which are in old and new input call + :type common_nodes: List[String] + + :param old_edges_list: list of links between publications from old call + :type old_edges_list: List[List[String,String]] + + :param old_depth: old max search depth + :type old_depth: int - :param old_edges_list: list of links between publications from old call - :type old_edges_list: List[List[String,String]] + :param old_height: old max search height + :type old_height: int function to start recursive node removal for references and citations and to change edge list to valid state ''' @@ -102,8 +126,8 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list): for pub in input_obj_list: if (common == pub.doi_url): usable_nodes.append(pub) - search_ref_graph_rec(pub) - search_cit_graph_rec(pub) + search_ref_graph_rec(pub, 1, old_depth) + search_cit_graph_rec(pub, 1, old_height) back_to_valid_edges(old_edges_list, usable_nodes) diff --git a/verarbeitung/update_graph/import_from_json.py b/verarbeitung/update_graph/import_from_json.py index 92d9b02e2c225eaf2a5cd2c3607f080ee9c231a9..72217d5da3a46e368272cc46a176bc14557320ba 100644 --- a/verarbeitung/update_graph/import_from_json.py +++ b/verarbeitung/update_graph/import_from_json.py @@ -86,6 +86,9 @@ def input_from_json(json_file): # adds references and citations to publications and creates edges add_ref_and_cit_to_pubs(input_dict) + old_depth_height = input_dict["depth_height"] + old_depth = old_depth_height[0] + old_height = old_depth_height[1] - return(list_of_nodes_py, list_of_edges_py) \ No newline at end of file + return(list_of_nodes_py, list_of_edges_py, old_depth, old_height) \ No newline at end of file diff --git a/verarbeitung/update_graph/update_depth.py b/verarbeitung/update_graph/update_depth.py index 1d99d5a06cbde524d6ea5f59fa81ed0ba09f7aeb..f843bb5263608b09188559bd9e4c5f6cc6924cc4 100644 --- a/verarbeitung/update_graph/update_depth.py +++ b/verarbeitung/update_graph/update_depth.py @@ -93,20 +93,6 @@ def reduce_max_depth(max_depth): processed_input_list.remove(pub) - -def get_old_height_depth(): - ''' - function to get old max height and max depth from previous construction call - ''' - max_height = 0 - max_depth = 0 - for pub in processed_input_list: - if (pub.group < 0): - max_depth = max(max_depth, abs(pub.group)) - if (pub.group > 0): - max_height = max(max_height, pub.group) - return(max_height, max_depth) - def get_old_max_references(old_depth, test_var): ''' :param old_depth: old maximum depth to search for citations @@ -141,7 +127,7 @@ def get_old_max_citations(old_height, test_var): old_max_citations.append(pub) return(old_max_citations) -def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): +def update_depth(obj_input_list, input_edges, new_depth, new_height, old_depth, old_height, test_var): ''' :param obj_input_list: input list of publications of type Publication from update_graph :type obj_input_list: List[Publication] @@ -165,8 +151,6 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): processed_input_list = obj_input_list valid_edges = input_edges - old_height, old_depth = get_old_height_depth() - # removes publications and links from recursion levels which aren't needed anymore or adds new ones if (old_depth > new_depth): reduce_max_depth(new_depth) @@ -187,8 +171,6 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): # adds edges between reference group and citation group of known publications - return(old_depth, old_height) - diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 1e74773c68d5cd0af8127579a4d11362ef02841e..8143ab4a02aaa361e6fe33e8913476f7752f4e4b 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -16,9 +16,11 @@ __status__ = "Production" import sys + sys.path.append("../../") from input.publication import Publication +from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges from verarbeitung.get_pub_from_input import get_pub from .compare_old_and_new_node_lists import compare_old_and_new_node_lists from .delete_nodes_edges import delete_nodes_and_edges @@ -85,7 +87,7 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes ''' # gets information from previous cunstruction call - old_obj_input_list , old_edges_list = input_from_json(json_file) + old_obj_input_list , old_edges_list, old_search_depth, old_search_height = input_from_json(json_file) # one global list to save the process of removing unneeded publications and one to save valid edges global processed_list, valid_edges, error_doi_list @@ -106,11 +108,11 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes processed_list_copy = processed_list.copy() valid_edges_copy = valid_edges.copy() - old_search_depth, old_search_height = update_depth(processed_list, valid_edges, search_depth, search_height, test_var) + update_depth(processed_list, valid_edges, search_depth, search_height, old_search_depth, old_search_height, test_var) # deletes publications and edges from node_list if publications can no longer be reached if (len(deleted_nodes) > 0): - processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges) + processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, valid_edges, old_search_depth, old_search_height) if (len(inserted_nodes) > 0): @@ -134,4 +136,6 @@ def update_graph(new_doi_input_list, json_file, search_depth, search_height, tes if inserted_edge not in valid_edges: valid_edges.append(inserted_edge) + complete_inner_edges(True, processed_list, valid_edges) + return(processed_list, valid_edges, error_doi_list)