diff --git a/verarbeitung/start_script.py b/start_script.py similarity index 92% rename from verarbeitung/start_script.py rename to start_script.py index c93aa4744e28857cb7e5bb4421d6f3e47eb44dae..c73894eae2f99c4b306c66772e847789167ea41b 100644 --- a/verarbeitung/start_script.py +++ b/start_script.py @@ -9,4 +9,4 @@ doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') -Processing(doi_list, 2, 2, 'test.json') \ No newline at end of file +Processing(doi_list, 3, 2, 'test.json') \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py index a00d1f6fc200dd30493075561833079fca9b65df..21a2c7e9f9580a17d443e19b49c6f8bafd038e1d 100644 --- a/verarbeitung/construct_new_graph/add_citations_rec.py +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -32,10 +32,12 @@ def get_cit_type_list(pub, cit_type): function to create nodes and edges and call create_graph_structure_citations ''' - if (cit_type == "Citation"): + if cit_type == "Citation": return(pub.citations) - else: + elif cit_type == "Reference": return(pub.references) + else: + return(ValueError) def create_global_lists_cit(input_nodes, input_edges, pub, search_depth, search_depth_max, cit_type, test_var): ''' diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index b6bdeee5ccbcab6a2f4e1e0ee668fdfe1edf7f0c..89825a8018a0f4ace87a2f04c834d527605c4b57 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -15,6 +15,7 @@ __status__ = "Production" import sys +import gc from pathlib import Path from os import error sys.path.append("../") @@ -24,6 +25,43 @@ from verarbeitung.get_pub_from_input import get_pub from .export_to_json import output_to_json from .add_citations_rec import add_citations, create_global_lists_cit +def initialize_nodes_list_test(doi_input_list, search_depth_max, search_height_max, test_var): + ''' + :param doi_input_list: input list of doi from UI + :type doi_input_list: List[String] + + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + for unit test purposes only + ''' + global nodes, edges + nodes = [] + edges = [] + return(initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var)) + +def complete_inner_edges_test(test_nodes, test_edges): + ''' + :param test_nodes: list of publications from unit test + :type test_nodes: List[Publication] + + :param test_nodes: list of links from unit test + :type test_nodes: List[List[String,String]] + + for unit test purposes only + ''' + + global nodes, edges + nodes = test_nodes + edges = test_edges + complete_inner_edges() + return(nodes, edges) def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): ''' @@ -54,6 +92,7 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t # checks if publication already exists in nodes not_in_nodes = True #boolean value to check if a node already exists in the set of nodes + for node in nodes: #iterates over every node in the set of nodes if (pub.doi_url == node.doi_url): #determines that a node with this doi already is in the set not_in_nodes = False #false --> node will not be created @@ -110,6 +149,15 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va :param test_var: variable to differenciate between test and url call :type test_var: boolean + :param test_var: variable to check if call is from update_graph with known nodes and edges or fresh construction + :type test_var: boolean + + :param input_nodes: list of publications from update_graph + :type input_nodes: List[Publication] + + :param input_nodes: list of links from update_graph + :type input_nodes: List[List[String,String]] + main function to start graph generation ''' @@ -145,7 +193,11 @@ def init_graph_construction(doi_input_list, search_depth, search_height, test_va # adds edges between reference group and citation group of known publications complete_inner_edges() - # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes, edges, test_var = test_var) + # garbage collection to delete nodes and edges lists. Needed because python keeps lists after function end till next function call + new_nodes = nodes.copy() + new_edges = edges.copy() + del nodes + del edges + gc.collect() - return(nodes,edges) + return(new_nodes,new_edges) diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index aeb1ae04b7931c7e81e6ea73efcc52fd41dc20b0..183e7647e90c750badbd216f3ec62d9935c79e0c 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00203", "name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 1}, {"doi": "https://doi.org/10.1021/acs.jctc.0c01006", "name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 14}, {"doi": "https://doi.org/10.1021/acs.jctc.9b00557", "name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 9}, {"doi": "https://doi.org/10.1021/ci300604z", "name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jctc.5b00834", "name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 48}, {"doi": "https://doi.org/10.1021/ci700036j", "name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 32}, {"doi": "https://doi.org/10.1021/ci500209e", "name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jm2005145", "name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300399w", "name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp9723574", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp972358w", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00675", "name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 26}, {"doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 12}, {"doi": "https://doi.org/10.1021/jm300687e", "name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.8b00312", "name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 74}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00778", "name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 66}, {"doi": "https://doi.org/10.1021/jm0306430", "name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 97}, {"doi": "https://doi.org/10.1021/jm020406h", "name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300493w", "name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 99}, {"doi": "https://doi.org/10.1021/jm049314d", "name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acsomega.1c04320", "name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpcb.1c08383", "name": "Molecular Simulations of Aqueous Electrolytes: Role of Explicit Inclusion of Charge Transfer into Force Fields", "author": ["Max L. Berkowitz"], "year": "November 22, 2021", "journal": "Journal of Physical Chemistry B", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpca.1c06264", "name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00226", "name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "group": "Citedby", "depth": 2, "citations": 11}, {"doi": "https://doi.org/10.1021/acs.est.9b06379", "name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "group": "Citedby", "depth": 2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci049714+", "name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci3001277", "name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.5b00559", "name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci7004498", "name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm020155c", "name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm9602928", "name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci025599w", "name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "group": "Reference", "depth": -2, "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.jpcb.1c08383", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]} \ No newline at end of file +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}]} \ No newline at end of file diff --git a/verarbeitung/test/construct_graph_unittest.py b/verarbeitung/test/construct_graph_unittest.py index 383f306da2a0db297882e64abc7e00e2c8b1afbc..4b1dabe2c86bb018278ac81f82bac2a78927d286 100644 --- a/verarbeitung/test/construct_graph_unittest.py +++ b/verarbeitung/test/construct_graph_unittest.py @@ -1,23 +1,25 @@ import unittest -import sys -from pathlib import Path +import sys + sys.path.append("../") -from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction, initialize_nodes_list +from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction, initialize_nodes_list_test, complete_inner_edges_test +from verarbeitung.construct_new_graph.add_citations_rec import get_cit_type_list +from verarbeitung.test.input_test import input_test_func class ConstructionTest(unittest.TestCase): maxDiff = None def testCycle(self): - nodes, edges = init_graph_construction(['doiz1'],1,1,True) + nodes, edges = init_graph_construction(['doiz1'],1,1,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) self.assertCountEqual(edges, [['doiz1', 'doiz2'], ['doiz2', 'doiz1']]) - nodes, edges = init_graph_construction(['doiz1'],2,2,True) + nodes, edges = init_graph_construction(['doiz1'],2,2,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes, ['doiz1', 'doiz2']) self.assertCountEqual(edges, [['doiz2', 'doiz1'], ['doiz1', 'doiz2']]) @@ -29,56 +31,56 @@ class ConstructionTest(unittest.TestCase): #def testEmptyDepth(self): def testEmptyDepthHeight(self): - nodes, edges = init_graph_construction(['doi1'],0,0,True) + nodes, edges = init_graph_construction(['doi1'],0,0,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi1']) self.assertCountEqual(edges, []) - nodes, edges = init_graph_construction(['doi1', 'doi2'],0,0,True) + nodes, edges = init_graph_construction(['doi1', 'doi2'],0,0,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes, ['doi1','doi2']) self.assertCountEqual(edges, [['doi1', 'doi2']]) - nodes, edges = init_graph_construction(['doi1', 'doi2', 'doi3'],0,0,True) + nodes, edges = init_graph_construction(['doi1', 'doi2', 'doi3'],0,0,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes, ['doi1','doi2', 'doi3']) self.assertCountEqual(edges, [['doi3', 'doi1'], ['doi1', 'doi2']]) def testInnerEdges(self): - nodes, edges = init_graph_construction(['doi_ie1'],1,1,True) + nodes, edges = init_graph_construction(['doi_ie1'],1,1,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_ie1','doi_ie2','doi_ie3']) self.assertCountEqual(edges,[['doi_ie1','doi_ie2'],['doi_ie3','doi_ie1'],['doi_ie3','doi_ie2']]) def testRightHeight(self): - nodes, edges = init_graph_construction(['doi_h01'],0,1,True) + nodes, edges = init_graph_construction(['doi_h01'],0,1,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_h01']) self.assertCountEqual(edges, []) - nodes, edges = init_graph_construction(['doi_h02'],0,1,True) + nodes, edges = init_graph_construction(['doi_h02'],0,1,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_h02','doi_h1']) self.assertCountEqual(edges, [['doi_h1','doi_h02']]) - nodes, edges = init_graph_construction(['doi_h02'],0,2,True) + nodes, edges = init_graph_construction(['doi_h02'],0,2,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_h02','doi_h1','doi_h2']) self.assertCountEqual(edges, [['doi_h1','doi_h02'], ['doi_h2','doi_h1']]) def testRightDepth(self): - nodes, edges = init_graph_construction(['doi_d01'],1,0,True) + nodes, edges = init_graph_construction(['doi_d01'],1,0,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_d01']) self.assertCountEqual(edges, []) - nodes, edges = init_graph_construction(['doi_d02'],1,0,True) + nodes, edges = init_graph_construction(['doi_d02'],1,0,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_d02','doi_d1']) self.assertCountEqual(edges, [['doi_d02','doi_d1']]) - nodes, edges = init_graph_construction(['doi_d02'],2,0,True) + nodes, edges = init_graph_construction(['doi_d02'],2,0,True,False) doi_nodes = keep_only_dois(nodes) self.assertCountEqual(doi_nodes,['doi_d02','doi_d1','doi_d2']) self.assertCountEqual(edges, [['doi_d02','doi_d1'], ['doi_d1','doi_d2']]) @@ -89,19 +91,54 @@ class ConstructionTest(unittest.TestCase): # initialize_graph.py: def test_initialize_nodes_list(self): - references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list(['doi_lg_1_i','doi_lg_2_i'], 0, 0, True) + references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i','doi_lg_2_i'], 0, 0, True) doi_references = keep_only_dois(references_pub_obj_list) doi_citations = keep_only_dois(citations_pub_obj_list) self.assertCountEqual(doi_references, []) self.assertCountEqual(doi_citations, []) - references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list(['doi_lg_1_i','doi_lg_2_i'], 1, 1, True) + references_pub_obj_list, citations_pub_obj_list = initialize_nodes_list_test(['doi_lg_1_i','doi_lg_2_i'], 1, 1, True) doi_references = keep_only_dois(references_pub_obj_list) doi_citations = keep_only_dois(citations_pub_obj_list) self.assertCountEqual(doi_references, ['doi_lg_1_d11','doi_lg_1_d12','doi_lg_2_d11','doi_lg_2_d12']) self.assertCountEqual(doi_citations, ['doi_lg_1_h11','doi_lg_1_h12','doi_cg_i','doi_lg_2_h11','doi_lg_2_h12']) - + def test_complete_inner_edges(self): + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') + pub_lg_1_h_12.group = 1 + pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') + pub_lg_1_d_12.group = -1 + nodes = [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12] + edges = [['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h12','doi_lg_1_i']] + processed_nodes, processed_edges = complete_inner_edges_test(nodes, edges) + self.assertCountEqual(processed_nodes, [pub_lg_1_i, pub_lg_1_h_12, pub_lg_1_d_12]) + self.assertCountEqual(processed_edges, [['doi_lg_1_i','doi_lg_1_d12'],['doi_lg_1_h12','doi_lg_1_i'],['doi_lg_1_h12','doi_lg_1_d12']]) + + # add_citations_rec.py: + + def test_get_type_list(self): + pub_lg_1_i = input_test_func('doi_lg_1_i') + pub_lg_1_i.group = 0 + self.assertEqual(get_cit_type_list(pub_lg_1_i, "Hallo"), ValueError) + + pub_lg_1_h_12 = input_test_func('doi_lg_1_h12') + pub_lg_1_h_12.group = 1 + pub_lg_1_h_12_refs = get_cit_type_list(pub_lg_1_h_12, "Reference") + pub_lg_1_h_12_cits = get_cit_type_list(pub_lg_1_h_12, "Citation") + self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_refs), keep_only_dois(pub_lg_1_h_12.references)) + self.assertCountEqual(keep_only_dois(pub_lg_1_h_12_cits), keep_only_dois(pub_lg_1_h_12.citations)) + + pub_lg_1_d_12 = input_test_func('doi_lg_1_d12') + pub_lg_1_d_12.group = -1 + pub_lg_1_d_12_refs = get_cit_type_list(pub_lg_1_d_12, "Reference") + pub_lg_1_d_12_cits = get_cit_type_list(pub_lg_1_d_12, "Citation") + self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_refs), keep_only_dois(pub_lg_1_d_12.references)) + self.assertCountEqual(keep_only_dois(pub_lg_1_d_12_cits), keep_only_dois(pub_lg_1_d_12.citations)) + + def test_create_graph_structure_citations(self): + print("Hallo") def keep_only_dois(nodes): diff --git a/verarbeitung/test_output.json b/verarbeitung/test_output.json index f96362a05cea7ad954fa28bfc22074e15e9fa1cd..50d6e78c1960661d4425343dfe7605eeb35d6746 100644 --- a/verarbeitung/test_output.json +++ b/verarbeitung/test_output.json @@ -1 +1 @@ -{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}]} \ No newline at end of file +{"nodes": [{"doi": "doi_h02", "name": "title_h02", "author": ["contributor_h02"], "year": "date_h02", "journal": "journal_h02", "group": "Input", "depth": 0, "citations": 1}, {"doi": "doi_h1", "name": "title_h1", "author": ["contributor_h1"], "year": "date_h1", "journal": "journal_h1", "group": "Citedby", "depth": 1, "citations": 1}, {"doi": "doi_h2", "name": "title_h2", "author": ["contributor_h2"], "year": "date_h2", "journal": "journal_h2", "group": "Citedby", "depth": 2, "citations": 1}], "links": [{"source": "doi_h1", "target": "doi_h02"}, {"source": "doi_h2", "target": "doi_h1"}]} \ No newline at end of file diff --git a/verarbeitung/update_graph/connect_new_input.py b/verarbeitung/update_graph/connect_new_input.py index a7e7fda3e10932d932436ec8e96036c6380c7b9f..35603414f4d22d0612e5dd02b73498261d458a88 100644 --- a/verarbeitung/update_graph/connect_new_input.py +++ b/verarbeitung/update_graph/connect_new_input.py @@ -19,7 +19,6 @@ from os import error sys.path.append("../") -from .import_from_json import input_from_json from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction from verarbeitung.construct_new_graph.add_citations_rec import add_citations, get_cit_type_list, create_global_lists_cit diff --git a/verarbeitung/update_graph/update_depth.py b/verarbeitung/update_graph/update_depth.py index e72bb6ae0e56092d9e30e918b5e61228b50d3f27..38560c7d70e9eb57b8bf4ec44a1a9516fcde2b51 100644 --- a/verarbeitung/update_graph/update_depth.py +++ b/verarbeitung/update_graph/update_depth.py @@ -13,7 +13,7 @@ __status__ = "Production" #__version__ = "" #__maintainer__ = "" -import sys +import sys sys.path.append("../../") from verarbeitung.construct_new_graph.add_citations_rec import add_citations @@ -72,7 +72,10 @@ def get_old_max_references(old_depth): old_max_references = [] for pub in processed_input_list: if (abs(pub.group) == old_depth): - old_max_references.append(pub.references) + for reference in pub.references: + for ref_pub in processed_input_list: + if reference.doi_url == ref_pub.doi_url: + old_max_references.append(ref_pub) return(old_max_references) def get_old_max_citations(old_height): @@ -85,7 +88,10 @@ def get_old_max_citations(old_height): old_max_citations = [] for pub in processed_input_list: if (abs(pub.group) == old_height): - old_max_citations.append(pub.citations) + for citation in pub.citations: + for cit_pub in processed_input_list: + if citation.doi_url == cit_pub.doi_url: + old_max_citations.append(cit_pub) return(old_max_citations) def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): @@ -123,10 +129,10 @@ def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): # adds publications and links for new recursion levels elif (old_depth < new_depth): - old_max_references = get_old_max_references() + old_max_references = get_old_max_references(old_depth) add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var) elif (old_height < new_height): - old_max_citations = get_old_max_citations() + old_max_citations = get_old_max_citations(old_height) add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var) back_to_valid_edges(valid_edges, processed_input_list) diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 90114c3453e84ed4fc7c2c2d407351309f42969b..9a91f94623426d09da56d055f27a2c5727af139d 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -20,7 +20,6 @@ sys.path.append("../../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub -from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction from .Knoten_Vergleich import doi_listen_vergleichen from .delete_nodes_edges import delete_nodes_and_edges from .connect_new_input import connect_old_and_new_input