diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index 0dcc7391bd5a633a86841f6097f486017ae94dfa..54ee02df407398303dd10cccfb2660499dc2ac59 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -17,7 +17,9 @@ from bs4 import BeautifulSoup as bs import requests as req import sys from pathlib import Path -from input_fj import input +#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +from input.interface import InputInterface as Input +#import input from input_test import input_test_func from json_demo import output_to_json @@ -33,7 +35,15 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t if(test_var): pub = input_test_func(pub_doi) else: - pub = input(pub_doi) + #print(pub_doi) + inter = Input() + try: + pub = inter.get_publication(pub_doi) + except ValueError: + continue + except IndexError: + print(pub_doi) + # checks if publication already exists in nodes not_in_nodes = True @@ -94,7 +104,15 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ if (test_var): reference_pub_obj = input_test_func(reference.doi_url) else: - reference_pub_obj = input(reference.doi_url) + #reference_pub_obj = Input(reference.doi_url) + inter = Input() + try: + reference_pub_obj = inter.get_publication(reference.doi_url) + except ValueError: + continue + # nur aus Testzwecken, da noch was bei Input falsch ist + except IndexError: + print(reference.doi_url) reference_pub_obj.group = "depth" nodes.append(reference_pub_obj) @@ -141,7 +159,14 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test if (test_var): citation_pub_obj = input_test_func(citation.doi_url) else: - citation_pub_obj = input(citation.doi_url) + #citation_pub_obj = Input(citation.doi_url) + inter = Input() + try: + citation_pub_obj = inter.get_publication(citation.doi_url) + except ValueError: + continue + except IndexError: + print(citation.doi_url) citation_pub_obj.group = "height" nodes.append(citation_pub_obj) @@ -231,9 +256,11 @@ def print_graph(nodes, edges): # program test, because there is no connection to UI yet. def try_known_publications(): doi_list = [] - doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + #doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + #doi_list.append('https://doi.org/10.1021/acs.jcim.9b00249') + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') #arr.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') - doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') + #doi_list.append('https://doi.org/10.1021/acs.jmedchem.0c01332') #arr.append('https://doi.org/10.1021/acs.jcim.0c00741') #arr.append('https://doi.org/10.1021/ci700007b') @@ -242,6 +269,8 @@ def try_known_publications(): #arr.append[url] - nodes,edges = process_main(doi_list,2,2) + process_main(doi_list,1,1) - print_graph(nodes, edges) \ No newline at end of file + print_graph(nodes, edges) + +#try_known_publications() \ No newline at end of file diff --git a/verarbeitung/__pycache__/Processing.cpython-38.pyc b/verarbeitung/__pycache__/Processing.cpython-38.pyc index 63ac529316c848e829cd83ef44ec749e5903bf9e..1906483bf8be5183bfad874433aca0cd4a75a8fa 100644 Binary files a/verarbeitung/__pycache__/Processing.cpython-38.pyc and b/verarbeitung/__pycache__/Processing.cpython-38.pyc differ diff --git a/verarbeitung/__pycache__/input_test.cpython-38.pyc b/verarbeitung/__pycache__/input_test.cpython-38.pyc index df395212453392e135532b12396cd4c30a92ea05..35b42ad4c56ad3a65838c0ccc2716b9aea899b5b 100644 Binary files a/verarbeitung/__pycache__/input_test.cpython-38.pyc and b/verarbeitung/__pycache__/input_test.cpython-38.pyc differ diff --git a/verarbeitung/__pycache__/input_test.cpython-39.pyc b/verarbeitung/__pycache__/input_test.cpython-39.pyc index 68e42fd6a47a02787524c68816a42574834931d2..604973a2f2c133e5085aba44dcabe0ac4fa9ac05 100644 Binary files a/verarbeitung/__pycache__/input_test.cpython-39.pyc and b/verarbeitung/__pycache__/input_test.cpython-39.pyc differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-38.pyc b/verarbeitung/__pycache__/json_demo.cpython-38.pyc index 4a1e7ba987775a20fddaa4a8f846bb238670d6a1..4227be8f77a3c09fba1e059e2f19b5109e8f7ab7 100644 Binary files a/verarbeitung/__pycache__/json_demo.cpython-38.pyc and b/verarbeitung/__pycache__/json_demo.cpython-38.pyc differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc index 4e31ce337645d5282ddab11668bc6d745735f9f8..453d99172fe839affc06d41a37b8e18f6de3b08c 100644 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and b/verarbeitung/__pycache__/json_demo.cpython-39.pyc differ diff --git a/verarbeitung/json_demo.py b/verarbeitung/json_demo.py index b9f618d1a2dcac13ca51a530f365d40aa226bc11..bbfce9f14757a1ee09683785b9680ac04daee346 100644 --- a/verarbeitung/json_demo.py +++ b/verarbeitung/json_demo.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import json -from input_fj import input """ Functions that format the computed graph to match the interface to the output-part @@ -46,4 +45,3 @@ def output_to_json(V,E): #knoten = ["doi1", "doi2", "doi3"] #kanten = [[1,2],[3,4],[5,6]] #output_to_json(knoten,kanten) - diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json new file mode 100644 index 0000000000000000000000000000000000000000..1a8955785fbc440694de111e7dc470e287373e5e --- /dev/null +++ b/verarbeitung/json_text.json @@ -0,0 +1 @@ +{"nodes": [{"name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00203", "group": "input"}, {"name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.0c01006", "group": "depth"}, {"name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.9b00557", "group": "depth"}, {"name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300604z", "group": "depth"}, {"name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.5b00834", "group": "depth"}, {"name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci700036j", "group": "depth"}, {"name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci500209e", "group": "depth"}, {"name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm2005145", "group": "depth"}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth"}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth"}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp9723574", "group": "depth"}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp972358w", "group": "depth"}, {"name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00675", "group": "depth"}, {"name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "group": "depth"}, {"name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm300687e", "group": "depth"}, {"name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.8b00312", "group": "depth"}, {"name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00778", "group": "depth"}, {"name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm0306430", "group": "depth"}, {"name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020406h", "group": "depth"}, {"name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300493w", "group": "depth"}, {"name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm049314d", "group": "depth"}, {"name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "doi": "https://doi.org/10.1021/acsomega.1c04320", "group": "height"}], "links": [{"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}]} \ No newline at end of file diff --git a/verarbeitung/read_json.py b/verarbeitung/read_json.py new file mode 100644 index 0000000000000000000000000000000000000000..ee7b2069dc2e3dde2941af1dd91f4dd9c91e7504 --- /dev/null +++ b/verarbeitung/read_json.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +""" +Functions to read old json files to recreate old grpah structure + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +from bs4 import BeautifulSoup as bs +import requests as req +import sys +import json +from pathlib import Path +#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +from input.interface import InputInterface as Input +#import input +from input_test import input_test_func + + +class Publication: + #def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): + def __init__(self, doi_url, title, contributors, journal, publication_date, group): + self.doi_url = doi_url + self.title = title + self.contributors = contributors + self.journal = journal + self.publication_date = publication_date + #if references is None: + # self.references = [] + #else: + # self.references = ref(references) + #if citations is None: + # self.citations = [] + #else: + # self.citations = cit(citations) + self.group = group + + +class Citation: + def __init__(self,doi_url, title, contributors, journal, publication_date): + self.doi_url = doi_url + self.title = title + self.contributors = contributors + self.journal = journal + self.publication_date = publication_date + +class Reference: + def __init__(self,doi_url, title, contributors, journal, publication_date): + self.doi_url = doi_url + self.title = title + self.contributors = contributors + self.journal = journal + self.publication_date = publication_date + +#def cit(list_doi): +# cits = [] +# for doi_url in list_doi: +# for array in list_of_arrays: +# if doi_url == array[0]: +# cits.append(Citation(array[0], array[1], array[2], array[3], array[4])) +# return cits + +#def ref(list_doi): +# refs = [] +# for doi_url in list_doi: +# for array in list_of_arrays: +# if doi_url == array[0]: +# refs.append(Citation(array[0], array[1], array[2], array[3], array[4])) +# return refs + + + +def read_json(): + with open('json_text.json','r') as file: + obj = json.load(file) + for node in obj["nodes"]: + pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node ["group"]) + nodes.append(pub) + print(pub.doi_url) + print(pub.title) + print(pub.journal) + print(pub.group) + print(" ") + for edge in obj["edges"]: + for cit_doi in nodes: + if (edge[0] == cit_doi.doi_url): + cit_doi.references.append() + +global nodes, edges +nodes = [] +edges = [] + +read_json() +#print(type(obj)) + +#nodes = obj["nodes"] +#for node in nodes: + + #print(node["doi"]) \ No newline at end of file