diff --git a/verarbeitung/.gitignore b/verarbeitung/.gitignore index d167d87e324aad7b0d1509476bf761cf6820b5a2..b604f4fdf854363614dd5eb73cb2ab92a941ac64 100644 --- a/verarbeitung/.gitignore +++ b/verarbeitung/.gitignore @@ -54,4 +54,8 @@ coverage.xml docs/_build/ # PyBuilder -target/ \ No newline at end of file +target/ + +#CodeCounter + +.VSCodeCounter/ \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/add_citations_rec.py b/verarbeitung/construct_new_graph/add_citations_rec.py index 2faab32324fb12a6754dfb0e40891fcfa72c6553..a00d1f6fc200dd30493075561833079fca9b65df 100644 --- a/verarbeitung/construct_new_graph/add_citations_rec.py +++ b/verarbeitung/construct_new_graph/add_citations_rec.py @@ -184,4 +184,4 @@ def add_citations(input_nodes, input_edges, citations_pub_obj_list, search_depth edges = input_edges process_citations_rec(citations_pub_obj_list, search_depth, search_depth_max, cit_type, test_var) - return(nodes, edges) \ No newline at end of file + #return(nodes, edges) \ No newline at end of file diff --git a/verarbeitung/construct_new_graph/export_to_json.py b/verarbeitung/construct_new_graph/export_to_json.py index 22b0590be3dec7734e5ce418d9cbd59c99b41d75..c69a61befcc402f9aee5b2184db56e0ad245618f 100644 --- a/verarbeitung/construct_new_graph/export_to_json.py +++ b/verarbeitung/construct_new_graph/export_to_json.py @@ -60,7 +60,7 @@ def format_edges(edges): return list_of_edge_dicts -def output_to_json(nodes, edges, test_var): +def output_to_json(nodes, edges, json_file = 'json_text.json', test_var = False): ''' :param nodes: list of publications to export to json :type nodes: List[Publication] @@ -78,9 +78,9 @@ def output_to_json(nodes, edges, test_var): list_of_edge_dicts = format_edges(edges) dict_of_all["nodes"] = list_of_node_dicts dict_of_all["links"] = list_of_edge_dicts - if (test_var): + if (test_var and json_file == 'json_text.json'): with open('test_output.json','w') as outfile: json.dump(dict_of_all, outfile) else: - with open('json_text.json','w') as outfile: + with open(json_file,'w') as outfile: json.dump(dict_of_all, outfile) diff --git a/verarbeitung/construct_new_graph/initialize_graph.py b/verarbeitung/construct_new_graph/initialize_graph.py index c54880f3adae248588720778f4f97231c8250de0..d1d79f2bac7930eac551ff9faca72ef70e763ce6 100644 --- a/verarbeitung/construct_new_graph/initialize_graph.py +++ b/verarbeitung/construct_new_graph/initialize_graph.py @@ -142,6 +142,6 @@ def init_graph_construction(doi_input_list, search_height, search_depth, test_va complete_inner_edges() # calls a skript to save nodes and edges of graph in .json file - output_to_json(nodes, edges, test_var) + #output_to_json(nodes, edges, test_var) return(nodes,edges) diff --git a/verarbeitung/dev_files/print_graph_test.py b/verarbeitung/dev_files/print_graph_test.py index 0ce8cd974324c66b632806cef83f7c958742a587..b45e90956fd3535c08fafa5196c3b2f351985d13 100644 --- a/verarbeitung/dev_files/print_graph_test.py +++ b/verarbeitung/dev_files/print_graph_test.py @@ -20,7 +20,7 @@ import sys sys.path.append("../../") from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction from verarbeitung.update_graph.import_from_json import input_from_json -from verarbeitung.update_graph.update_graph import check_graph_updates +from verarbeitung.update_graph.update_graph import update_graph # a function to print nodes and edges from a graph def print_graph(nodes, edges): @@ -93,7 +93,7 @@ def try_delete_nodes(): # list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') # doi_list = [] # doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - # valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py) + # valid_nodes, valid_edges = update_graph(doi_list, list_of_nodes_py, list_of_edges_py) # print_simple(valid_nodes, valid_edges) def try_import(): diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json index 183e7647e90c750badbd216f3ec62d9935c79e0c..aeb1ae04b7931c7e81e6ea73efcc52fd41dc20b0 100644 --- a/verarbeitung/json_text.json +++ b/verarbeitung/json_text.json @@ -1 +1 @@ -{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}]} \ No newline at end of file +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00203", "name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 1}, {"doi": "https://doi.org/10.1021/acs.jctc.0c01006", "name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 14}, {"doi": "https://doi.org/10.1021/acs.jctc.9b00557", "name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 9}, {"doi": "https://doi.org/10.1021/ci300604z", "name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jctc.5b00834", "name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 48}, {"doi": "https://doi.org/10.1021/ci700036j", "name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 32}, {"doi": "https://doi.org/10.1021/ci500209e", "name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jm2005145", "name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300399w", "name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp9723574", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp972358w", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00675", "name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 26}, {"doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 12}, {"doi": "https://doi.org/10.1021/jm300687e", "name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.8b00312", "name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 74}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00778", "name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 66}, {"doi": "https://doi.org/10.1021/jm0306430", "name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 97}, {"doi": "https://doi.org/10.1021/jm020406h", "name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300493w", "name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 99}, {"doi": "https://doi.org/10.1021/jm049314d", "name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acsomega.1c04320", "name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpcb.1c08383", "name": "Molecular Simulations of Aqueous Electrolytes: Role of Explicit Inclusion of Charge Transfer into Force Fields", "author": ["Max L. Berkowitz"], "year": "November 22, 2021", "journal": "Journal of Physical Chemistry B", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpca.1c06264", "name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00226", "name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "group": "Citedby", "depth": 2, "citations": 11}, {"doi": "https://doi.org/10.1021/acs.est.9b06379", "name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "group": "Citedby", "depth": 2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci049714+", "name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci3001277", "name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.5b00559", "name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci7004498", "name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm020155c", "name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm9602928", "name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci025599w", "name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "group": "Reference", "depth": -2, "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.jpcb.1c08383", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]} \ No newline at end of file diff --git a/verarbeitung/new_height.json b/verarbeitung/new_height.json new file mode 100644 index 0000000000000000000000000000000000000000..40a7bc557e428357a6d173f86c5aea4c19b457b3 --- /dev/null +++ b/verarbeitung/new_height.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "doi_lg_1_i", "name": "title_lg_1_i", "author": ["contributor_lg_1_i"], "year": "date_lg_1_i", "journal": "journal_lg_1_i", "group": "Input", "depth": 0, "citations": 2}, {"doi": "doi_lg_1_d11", "name": "title_lg_1_d11", "author": ["contributor_lg_1_d11"], "year": "date_lg_1_d11", "journal": "journal_lg_1_d11", "group": "Reference", "depth": -1, "citations": 1}, {"doi": "doi_lg_1_d12", "name": "title_lg_1_d12", "author": ["contributor_lg_1_d12"], "year": "date_lg_1_d12", "journal": "journal_lg_1_d12", "group": "Reference", "depth": -1, "citations": 2}, {"doi": "doi_lg_1_h11", "name": "title_lg_1_h11", "author": ["contributor_lg_1_h11"], "year": "date_lg_1_h11", "journal": "journal_lg_1_h11", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h12", "name": "title_lg_1_h12", "author": ["contributor_lg_1_h12"], "year": "date_lg_1_h12", "journal": "journal_lg_1_h12", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "doi_lg_1_h21", "name": "title_lg_1_h21", "author": ["contributor_lg_1_h21"], "year": "date_lg_1_h21", "journal": "journal_lg_1_h21", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h22", "name": "title_lg_1_h22", "author": ["contributor_lg_1_h22"], "year": "date_lg_1_h22", "journal": "journal_lg_1_h22", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_h23", "name": "title_lg_1_h23", "author": ["contributor_lg_1_h23"], "year": "date_lg_1_h23", "journal": "journal_lg_1_h23", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "doi_lg_1_d21", "name": "title_lg_1_d21", "author": ["contributor_lg_1_d21"], "year": "date_lg_1_d21", "journal": "journal_lg_1_d21", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d22", "name": "title_lg_1_d22", "author": ["contributor_lg_1_d22"], "year": "date_lg_1_d22", "journal": "journal_lg_1_d22", "group": "Reference", "depth": -2, "citations": 2}, {"doi": "doi_lg_1_d23", "name": "title_lg_1_d23", "author": ["contributor_lg_1_d23"], "year": "date_lg_1_d23", "journal": "journal_lg_1_d23", "group": "Reference", "depth": -2, "citations": 2}], "links": [{"source": "doi_lg_1_i", "target": "doi_lg_1_d11"}, {"source": "doi_lg_1_i", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_i"}, {"source": "doi_lg_1_h21", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h11"}, {"source": "doi_lg_1_h22", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_h23", "target": "doi_lg_1_h12"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d11", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d21", "target": "doi_lg_1_d22"}, {"source": "doi_lg_1_d22", "target": "doi_lg_1_d21"}, {"source": "doi_lg_1_d12", "target": "doi_lg_1_d23"}, {"source": "doi_lg_1_h12", "target": "doi_lg_1_d12"}, {"source": "doi_lg_1_h11", "target": "doi_lg_1_h12"}]} \ No newline at end of file diff --git a/verarbeitung/process_main.py b/verarbeitung/process_main.py index d1654c9da7efcc21e5d13136472d28f9827a14b0..4e0678386b6da3ee18d4a0f7b0c271f3167c93fd 100644 --- a/verarbeitung/process_main.py +++ b/verarbeitung/process_main.py @@ -1,4 +1,54 @@ +# -*- coding: utf-8 -*- +""" +main function to call to generate a graph representing citations between multiple ACS/Nature journals +""" -def Processing(url): - print(url) \ No newline at end of file +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + + +import sys +from pathlib import Path +from os import error + +sys.path.append("../") + +from verarbeitung.construct_new_graph.export_to_json import output_to_json +from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction +from verarbeitung.update_graph.update_graph import update_graph + +def Processing(url_list, search_depth, search_height, json_file = 'json_text.json'): + ''' + :param url_list: list of urls to construct publication graph for + :type url_list: List[String] + + :param search_depth: maximum depth to search for references + :type search_depth: int + + :param search_height: maximum height to search for citations + :type search_height: int + + :param json_file: file to export graph to + :type json_file: String + + main function to construct new or updated publication graphs + ''' + + # updates graph if json file is known in directory otherwise starts new graph construction + try: + with open(json_file) as f: + nodes, edges = update_graph(url_list, json_file, search_depth, search_height) + + except IOError: + nodes, edges = init_graph_construction(url_list, search_depth, search_height) + + # exports graph to given json file name + output_to_json(nodes, edges, json_file) + \ No newline at end of file diff --git a/verarbeitung/start_script.py b/verarbeitung/start_script.py index 9b46a2e916f228ada296c8578b6483866ae2ad5f..71c0e8dadfc0a736f2e465fd78c56741631dacd2 100644 --- a/verarbeitung/start_script.py +++ b/verarbeitung/start_script.py @@ -1,5 +1,11 @@ import sys from pathlib import Path +from verarbeitung.process_main import Processing from verarbeitung.dev_files.print_graph_test import try_known_publications, try_delete_nodes -try_delete_nodes() \ No newline at end of file + +doi_list = [] +doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') +#doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') +doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') +Processing(doi_list, 2, 2, 'test.json') \ No newline at end of file diff --git a/verarbeitung/test.json b/verarbeitung/test.json new file mode 100644 index 0000000000000000000000000000000000000000..aeb1ae04b7931c7e81e6ea73efcc52fd41dc20b0 --- /dev/null +++ b/verarbeitung/test.json @@ -0,0 +1 @@ +{"nodes": [{"doi": "https://doi.org/10.1021/acs.jcim.9b00249", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 5}, {"doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "group": "Citedby", "depth": 1, "citations": 2}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00741", "name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 1, "citations": 8}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00250", "name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 1, "citations": 12}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00203", "name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Input", "depth": 0, "citations": 1}, {"doi": "https://doi.org/10.1021/acs.jctc.0c01006", "name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 14}, {"doi": "https://doi.org/10.1021/acs.jctc.9b00557", "name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 9}, {"doi": "https://doi.org/10.1021/ci300604z", "name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jctc.5b00834", "name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "group": "Reference", "depth": -1, "citations": 48}, {"doi": "https://doi.org/10.1021/ci700036j", "name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 32}, {"doi": "https://doi.org/10.1021/ci500209e", "name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jm2005145", "name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300399w", "name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp9723574", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/jp972358w", "name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.0c00675", "name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 26}, {"doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 12}, {"doi": "https://doi.org/10.1021/jm300687e", "name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.8b00312", "name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 74}, {"doi": "https://doi.org/10.1021/acs.jcim.9b00778", "name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 66}, {"doi": "https://doi.org/10.1021/jm0306430", "name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 97}, {"doi": "https://doi.org/10.1021/jm020406h", "name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/ci300493w", "name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -1, "citations": 99}, {"doi": "https://doi.org/10.1021/jm049314d", "name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -1, "citations": 100}, {"doi": "https://doi.org/10.1021/acsomega.1c04320", "name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "group": "Citedby", "depth": 1, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpcb.1c08383", "name": "Molecular Simulations of Aqueous Electrolytes: Role of Explicit Inclusion of Charge Transfer into Force Fields", "author": ["Max L. Berkowitz"], "year": "November 22, 2021", "journal": "Journal of Physical Chemistry B", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jpca.1c06264", "name": "Topological Characterization and Graph Entropies of Tessellations of Kekulene Structures: Existence of Isentropic Structures and Applications to Thermochemistry, Nuclear Magnetic Resonance, and Electron Spin Resonance", "author": ["S. Ruth Julie Kavitha", "Jessie Abraham", "Micheal Arockiaraj", "Joseph Jency", "Krishnan Balasubramanian"], "year": "September 1, 2021", "journal": "J. Phys. Chem. A", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acsmedchemlett.1c00251", "name": "The Growing Importance of Chirality in 3D Chemical Space Exploration and Modern Drug Discovery Approaches for Hit-ID", "author": ["Ilaria Proietti Silvestri", "Paul J. J. Colbon"], "year": "July 16, 2021", "journal": "ACS Med. Chem. Lett.", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jmedchem.1c00416", "name": "Target-Based Evaluation of \u201cDrug-Like\u201d Properties and Ligand Efficiencies", "author": ["Paul D. Leeson", "A. Patricia Bento", "Anna Gaulton", "Anne Hersey", "Emma J. Manners", "Chris J. Radoux", "Andrew R. Leach"], "year": "May 13, 2021", "journal": "Journal of Medicinal Chemistry", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.jcim.1c00226", "name": "Automatic Identification of Lansoprazole Degradants under Stress Conditions by LC-HRMS with MassChemSite and WebChembase", "author": ["Stefano Bonciarelli", "Jenny Desantis", "Laura Goracci", "Lydia Siragusa", "Ismael Zamora", "Elisabeth Ortega-Carrasco"], "year": "June 1, 2021", "journal": "Journal of Chemical Information and Modeling", "group": "Citedby", "depth": 2, "citations": 0}, {"doi": "https://doi.org/10.1021/acs.chemrestox.0c00006", "name": "Computational Approaches to Identify Structural Alerts and Their Applications in Environmental Toxicology and Drug Discovery", "author": ["Hongbin Yang", "Chaofeng Lou", "Weihua Li", "Guixia Liu", "Yun Tang"], "year": "February 24, 2020", "journal": "Chem. Res. Toxicol.", "group": "Citedby", "depth": 2, "citations": 11}, {"doi": "https://doi.org/10.1021/acs.est.9b06379", "name": "Toward a Global Understanding of Chemical Pollution: A First Comprehensive Analysis of National and Regional Chemical Inventories", "author": ["Zhanyun Wang", "Glen W. Walker", "Derek C. G. Muir", "Kakuko Nagatani-Yoshida"], "year": "January 22, 2020", "journal": "Environ. Sci. Technol.", "group": "Citedby", "depth": 2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci049714+", "name": "ZINC \u2212 A Free Database of Commercially Available Compounds for Virtual Screening", "author": ["John J. Irwin", "Brian K. Shoichet"], "year": "December 14, 2004", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci3001277", "name": "ZINC: A Free Tool to Discover Chemistry for Biology", "author": ["John J. Irwin", "Teague Sterling", "Michael M. Mysinger", "Erin S. Bolstad", "Ryan G. Coleman"], "year": "May 15, 2012", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/acs.jcim.5b00559", "name": "ZINC 15 \u2013 Ligand Discovery for Everyone", "author": ["Teague Sterling", "John J. Irwin"], "year": "October 19, 2015", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 98}, {"doi": "https://doi.org/10.1021/ci7004498", "name": "Application of Belief Theory to Similarity Data Fusion for Use in Analog Searching and Lead Hopping", "author": ["Steven W. Muchmore", "Derek A. Debe", "James T. Metz", "Scott P. Brown", "Yvonne C. Martin", "Philip J. Hajduk"], "year": "April 17, 2008", "journal": "Journal of Chemical Information and Modeling", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm020155c", "name": "Do Structurally Similar Molecules Have Similar Biological Activity?", "author": ["Yvonne C. Martin", "James L. Kofron", "Linda M. Traphagen"], "year": "August 13, 2002", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/jm9602928", "name": "The Properties of Known Drugs. 1. Molecular Frameworks", "author": ["Guy W. Bemis", "Mark A. Murcko"], "year": "July 19, 1996", "journal": "Journal of Medicinal Chemistry", "group": "Reference", "depth": -2, "citations": 100}, {"doi": "https://doi.org/10.1021/ci025599w", "name": "Molecular Shape Diversity of Combinatorial Libraries:\u2009 A Prerequisite for Broad Bioactivity\u2020", "author": ["Wolfgang H. B. Sauer", "Matthias K. Schwarz"], "year": "March 14, 2003", "journal": "J. Chem. Inf. Comput. Sci.", "group": "Reference", "depth": -2, "citations": 99}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.jpcb.1c08383", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acs.jpca.1c06264", "target": "https://doi.org/10.1021/acs.chemrev.1c00107"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.jmedchem.1c00416", "target": "https://doi.org/10.1021/acs.jmedchem.0c01332"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00226", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.chemrestox.0c00006", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.est.9b06379", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/acs.jcim.5b00559"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci7004498"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/jm9602928"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00675", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/ci3001277"}, {"source": "https://doi.org/10.1021/acs.jcim.5b00559", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci049714+"}, {"source": "https://doi.org/10.1021/ci7004498", "target": "https://doi.org/10.1021/jm020155c"}, {"source": "https://doi.org/10.1021/acsmedchemlett.1c00251", "target": "https://doi.org/10.1021/ci025599w"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/ci025599w"}]} \ No newline at end of file diff --git a/verarbeitung/test/update_graph_unittest.py b/verarbeitung/test/update_graph_unittest.py index c91efe4610ba54e1e9f9de4d6ceed8764fd9601b..c44de64c5fefc2d3545b662a1f0a42d92c69af5a 100644 --- a/verarbeitung/test/update_graph_unittest.py +++ b/verarbeitung/test/update_graph_unittest.py @@ -2,17 +2,20 @@ import unittest import sys from pathlib import Path + sys.path.append("../") from verarbeitung.construct_new_graph.initialize_graph import init_graph_construction +from verarbeitung.construct_new_graph.export_to_json import output_to_json from verarbeitung.update_graph.import_from_json import input_from_json -from verarbeitung.update_graph.update_graph import check_graph_updates +from verarbeitung.update_graph.update_graph import update_graph class UpdatingTest(unittest.TestCase): maxDiff = None def test_import_from_json(self): nodes_old, edges_old = init_graph_construction(['doi_lg_1_i'],2,2,True) + output_to_json(nodes_old, edges_old, test_var = True) nodes_new, edges_new = input_from_json('test_output.json') self.assertCountEqual(nodes_old,nodes_new) self.assertCountEqual(edges_old, edges_new) @@ -20,8 +23,8 @@ class UpdatingTest(unittest.TestCase): def test_deleted_input_dois(self): nodes_old_single, edges_old_single = init_graph_construction(['doi_lg_1_i'],2,2,True) nodes_old_both, edges_old_both = init_graph_construction(['doi_lg_1_i','doi_lg_2_i'],2,2,True) - nodes_new_both, edges_new_both = input_from_json('test_output.json') - nodes_new_single, edges_new_single = check_graph_updates(['doi_lg_1_i'], nodes_old_both, edges_old_both, 'test_output.json', 2, 2, True) + output_to_json(nodes_old_both, edges_old_both, test_var=True) + nodes_new_single, edges_new_single = update_graph(['doi_lg_1_i'], 'test_output.json', 2, 2, True) self.assertCountEqual(nodes_old_single,nodes_new_single) self.assertCountEqual(edges_old_single, edges_new_single) @@ -29,6 +32,29 @@ class UpdatingTest(unittest.TestCase): nodes_old_two, edges_old_two = init_graph_construction(['doi_lg_1_i','doi_cg_i'],3,3,True) nodes_old_three, edges_old_three = init_graph_construction(['doi_lg_1_i','doi_lg_2_i','doi_cg_i'],3,3,True) + def test_new_height(self): + nodes_height_0, edges_height_0 = init_graph_construction(['doi_lg_1_i'],2,0,True) + nodes_height_1, edges_height_1 = init_graph_construction(['doi_lg_1_i'],2,1,True) + nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True) + + output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) + nodes_new_height_1, edges_new_height_1 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 1, True) + self.assertCountEqual(nodes_height_1, nodes_new_height_1) + self.assertCountEqual(edges_height_1, edges_new_height_1) + + nodes_height_2, edges_height_2 = init_graph_construction(['doi_lg_1_i'],2,2,True) + output_to_json(nodes_height_2, edges_height_2, 'new_height.json', True) + nodes_new_height_0, edges_new_height_0 = update_graph(['doi_lg_1_i'], 'new_height.json', 2, 0, True) + self.assertCountEqual(nodes_height_0, nodes_new_height_0) + self.assertCountEqual(edges_height_0, edges_new_height_0) + + + + + + + + def keep_only_dois(nodes): ''' @@ -36,7 +62,7 @@ def keep_only_dois(nodes): :type nodes: List[Publication] gets nodes of type pub and return only their doi - ''' + ''' doi_list = [] for node in nodes: doi_list.append(node.doi_url) diff --git a/verarbeitung/update_graph/update_graph_del.py b/verarbeitung/update_graph/delete_nodes_edges.py similarity index 98% rename from verarbeitung/update_graph/update_graph_del.py rename to verarbeitung/update_graph/delete_nodes_edges.py index 5879d96a00981509c226750a61805a36b5955f2c..0e4571a15be9628d15a892629688d70ba5f9abf3 100644 --- a/verarbeitung/update_graph/update_graph_del.py +++ b/verarbeitung/update_graph/delete_nodes_edges.py @@ -78,10 +78,9 @@ def delete_nodes_and_edges(input_list, common_nodes, old_edges_list): function to start recursive node removal for references and citations and to change edge list to valid state ''' - global usable_nodes, input_obj_list usable_nodes = [] - input_obj_list = input_list.copy() + input_obj_list = input_list # starts for every common input node a tree-search and adds found nodes to usable_nodes for common in common_nodes: diff --git a/verarbeitung/update_graph/import_from_json.py b/verarbeitung/update_graph/import_from_json.py index f79e42bfe5343c91f9db0de02a200424305cf77a..92d9b02e2c225eaf2a5cd2c3607f080ee9c231a9 100644 --- a/verarbeitung/update_graph/import_from_json.py +++ b/verarbeitung/update_graph/import_from_json.py @@ -34,7 +34,8 @@ def create_pubs_from_json(input_dict): for node in input_dict["nodes"]: #creates for the nodes the objects class Publication - pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node["depth"] ) + pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], []) + pub.group = node["depth"] #appends the objects to a list list_of_nodes_py.append(pub) diff --git a/verarbeitung/update_graph/update_depth.py b/verarbeitung/update_graph/update_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..daf9063295d87a5733a69b32616211658ada81b8 --- /dev/null +++ b/verarbeitung/update_graph/update_depth.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +Functions to update the citation depth of recursive graph construction + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +sys.path.append("../../") + +from verarbeitung.construct_new_graph.add_citations_rec import add_citations +from verarbeitung.construct_new_graph.initialize_graph import complete_inner_edges +from .Kanten_Vergleich import back_to_valid_edges + + +def reduce_max_depth_height(max_depth_height): + ''' + :param max_depth_height: new maximum depth/height to reduce publications in publication list to + :type max_depth_height: int + + function to remove all publications which are not in new maximum depth/height threshold + ''' + for pub in processed_input_list: + if (abs(pub.group) > max_depth_height): + processed_input_list.remove(pub) + +def get_old_height_depth(): + ''' + function to get old max height and max depth from previous construction call + ''' + max_height = 0 + max_depth = 0 + for pub in processed_input_list: + if (pub.group < 0): + max_depth = max(max_depth, abs(pub.group)) + if (pub.group > 0): + max_height = max(max_height, pub.group) + return(max_height, max_depth) + +def get_old_max_references(old_depth): + ''' + :param old_depth: old maximum depth to search for citations + :type old_depth: int + + function to get references for new recursive levels + ''' + old_max_references = [] + for pub in processed_input_list: + if (abs(pub.group) == old_depth): + old_max_references.append(pub.references) + return(old_max_references) + +def get_old_max_citations(old_height): + ''' + :param old_height: old maximum height to search for citations + :type old_height: int + + function to get citations for new recursive levels + ''' + old_max_citations = [] + for pub in processed_input_list: + if (abs(pub.group) == old_height): + old_max_citations.append(pub.citations) + return(old_max_citations) + +def update_depth(obj_input_list, input_edges, new_depth, new_height, test_var): + ''' + :param obj_input_list: input list of publications of type Publication from update_graph + :type obj_input_list: List[Publication] + + :param input_edges: list of publications from update_graph + :type input_edges: List[Publication] + + :param new_depth: new maximum depth to search for references + :type new_depth: int + + :param new_height: new maximum height to search for citations + :type new_height: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean + + function to adjust old publication search depth to update call + ''' + + global processed_input_list, valid_edges + processed_input_list = obj_input_list + valid_edges = input_edges + + old_height, old_depth = get_old_height_depth() + + # removes publications and links from recursion levels which aren't needed anymore + if (old_depth > new_depth): + reduce_max_depth_height(new_depth) + valid_edges = back_to_valid_edges(processed_input_list, input_edges) + elif (old_height > new_height): + reduce_max_depth_height(new_height) + valid_edges = back_to_valid_edges(processed_input_list, valid_edges) + + # adds publications and links for new recursion levels + elif (old_depth < new_depth): + old_max_references = get_old_max_references() + add_citations(processed_input_list, valid_edges, old_max_references, old_depth+1, new_depth, "Reference", test_var) + elif (old_height < new_height): + old_max_citations = get_old_max_citations() + add_citations(processed_input_list, valid_edges, old_max_citations, old_height+1, new_height, "Citation", test_var) + + # adds edges between reference group and citation group of known publications + complete_inner_edges() + + + + diff --git a/verarbeitung/update_graph/update_graph.py b/verarbeitung/update_graph/update_graph.py index 068304ae413e43e95aa9bb424d00d7c627c819b6..054bcccf97c905d4517ea7a2d13b77b269722d05 100644 --- a/verarbeitung/update_graph/update_graph.py +++ b/verarbeitung/update_graph/update_graph.py @@ -14,16 +14,17 @@ __status__ = "Production" #__maintainer__ = "" -import sys -from pathlib import Path -from os import error +import sys + sys.path.append("../../") from input.publication import Publication from verarbeitung.get_pub_from_input import get_pub from .Knoten_Vergleich import doi_listen_vergleichen -from .update_graph_del import delete_nodes_and_edges +from .delete_nodes_edges import delete_nodes_and_edges from .connect_new_input import connect_old_and_new_input +from .update_depth import update_depth +from .import_from_json import input_from_json def get_old_input_dois(old_obj_input_list): @@ -65,10 +66,10 @@ def get_new_input_dois(new_input, test_var): return(new_input_dois) -def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, json_file, search_depth, search_height, test_var = False): +def update_graph(new_doi_input_list, json_file, search_depth, search_height, test_var = False): ''' :param new_doi_input_list: input list of doi from UI - :type new_doi_input_list: list of strings + :type new_doi_input_list: List[String] :param old_obj_input_list: list of publications retrieved from old json file :type old_obj_input_list: List[Publication] @@ -82,14 +83,14 @@ def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, function to compare old and new input, start node/edge removal and to return updated sets of nodes and edges ''' + # gets information from previous cunstruction call + old_obj_input_list , old_edges_list = input_from_json(json_file) + # one global list to save the process of removing unneeded publications and one to save valid edges - global processed_input_list, valid_edges - processed_input_list = old_obj_input_list.copy() + global processed_list, valid_edges + processed_list = old_obj_input_list valid_edges = [] - # save the return values of global lists - processed_input_list_del = [] - valid_edges_del = [] # get dois from lists to compare for differences old_doi_input_list = get_old_input_dois(old_obj_input_list) @@ -100,9 +101,11 @@ def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, # deletes publications and edges from node_list if publications can no longer be reached if (len(deleted_nodes) > 0): - processed_input_list_del, valid_edges_del = delete_nodes_and_edges(processed_input_list, common_nodes, old_edges_list) - + processed_list, valid_edges = delete_nodes_and_edges(processed_list, common_nodes, old_edges_list) + + update_depth(processed_list, valid_edges, search_depth, search_height, test_var) + if (len(inserted_nodes) > 0): connect_old_and_new_input(json_file, inserted_nodes, search_depth, search_height, test_var) - return(processed_input_list_del, valid_edges_del) + return(processed_list, valid_edges)