diff --git a/verarbeitung/Kanten_Vergleich.py b/verarbeitung/Kanten_Vergleich.py index 5e1cd41747f08a1cda10d9f735956809120b5139..0b3225d55c008adb328ffc0651b6f72f16bc59c1 100644 --- a/verarbeitung/Kanten_Vergleich.py +++ b/verarbeitung/Kanten_Vergleich.py @@ -1,24 +1,32 @@ #!/usr/bin/env python3 -def back_to_valid_edges(Kanten_aus_Json, Geloechte_Knoten): +def back_to_valid_edges(links_from_json, processed_input_list): ''' - :param Kanten_aus_Json: list of edges from the old graph - :type Kanten_aus_Json: list - :param Geloechte_Knoten: list of deleted nodes from the old graph - :type Geloechte_Knoten: list + :param links_from_json: list of edges from the old graph + :type links_from_json: list + :param processed_input_list: list pubs still in graph + :type processed_input_list: list function that deletes edges, if one ore two including nodes are deleted nodes ''' - list_of_edges_from_json = Kanten_aus_Json - list_of_valid_edges = list_of_edges_from_json - list_of_deleted_nodes = Geloechte_Knoten + list_of_valid_edges = links_from_json - for deleted_node in list_of_deleted_nodes: #iterates over all deleted nodes - for edge in list_of_edges_from_json: #iterates over all edges from old graph - for node in edge: #checks for both including nodes if one of them was delted - if node == deleted_node: # if one of them is a deleted node - list_of_valid_edges.remove(edge) #removes the edge - break #ist überflüssig, nur fürs verständnis + + #iterates over all edges from old graph + for edge in links_from_json: + + # counter for adjacent nodes + found_adj_nodes = 0 + for pub in processed_input_list: + # checks for both adjacent nodes of edge if pub is source/target node + for adj_node in edge: + # increases counter if adjacent node was found + if (adj_node == pub.doi_url): + found_adj_nodes += 1 + + #removes the edge if less than 2 adjacent nodes found + if (found_adj_nodes < 2): + list_of_valid_edges.remove(edge) return(list_of_valid_edges) diff --git a/verarbeitung/Knoten_Vergleich.py b/verarbeitung/Knoten_Vergleich.py index 37fc1671ebeae942e088508f88dabf30087d5cf5..55c10f2872ea3dcfb40aa30aac4b09aa01fe48a6 100644 --- a/verarbeitung/Knoten_Vergleich.py +++ b/verarbeitung/Knoten_Vergleich.py @@ -18,14 +18,14 @@ def doi_listen_vergleichen(alte,neue): all_dois = dois_from_old_graph + dois_from_new_graph for doi in all_dois: # iterates over the merged list of new and old dois - if ((Counter(all_dois)[doi]) == 2) & (doi not in common_nodes): # If the doi occurs twice the node is in the old and the new graph + if ((all_dois.count(doi) == 2) & (doi not in common_nodes)): # If the doi occurs twice the node is in the old and the new graph common_nodes.append(doi) #appends the doi to common ones, if its not alredy in it elif ((doi in dois_from_old_graph) & (doi not in dois_from_new_graph)): #If the doi occurs once and it is from old graph it is a deleted node deleted_nodes.append(doi) #appends the doi to deleted ones elif ((doi in dois_from_new_graph) & (doi not in dois_from_old_graph)): #if the doi occurs ince and it is from new graph it is a inserted node inserted_nodes.append(doi) #appends the doi to the inserted ones return(common_nodes, inserted_nodes, deleted_nodes) - + #Test Prints #liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"] diff --git a/verarbeitung/Processing.py b/verarbeitung/Processing.py index fb0d7fd72b720057a2c8fa292d03da5a39befd17..a3e245a6e337d089ec1fee75c1827a4be0df7ba3 100644 --- a/verarbeitung/Processing.py +++ b/verarbeitung/Processing.py @@ -27,18 +27,19 @@ from json_demo import output_to_json def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): ''' - :param doi_input_list: list with dois from user - :type doi_input_list: list - :param search_depth_max: recursion depth limit - :type search_depth_max: Integer - :param search_height_max: recursion height limit - :type search_height_max: Integer - :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll - :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) - - # adds every publication from input list to graph structure - # doi_input_list: list of publication dois from user + :param doi_input_list: input list of doi from UI + :type doi_input_list: list of strings + + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean ''' + references_pub_obj_list = [] citations_pub_obj_list = [] @@ -84,40 +85,41 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t def complete_inner_edges(test_var): ''' - :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll - :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) - - # adds edges between citation and reference group + :param test_var: variable to differenciate between test and url call + :type test_var: boolean ''' - for node in nodes: #iterates over all nodes in the set of nodes - if (node.group == "depth"): #checks if the node has group depth (=is a reference from a paper) - for citation in node.citations: #iterates over the papers that this paper is cited by - for cit in nodes: #iterates over all nodes in set of nodes - if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges): #checks if there is already a related node that is in the set of nodes - edges.append([citation.doi_url, node.doi_url]) # creates an edge between them - if (node.group == "height"): #checks if the node has group height (=is a citation from a paper) - for reference in node.references: #iterates over the papers that this is paper references - for ref in nodes: #iterates over all nodes in set of nodes - if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges): #checks if there is already a related node that is in the set of nodes - edges.append([node.doi_url,reference.doi_url]) #creates an edge between them + for node in nodes: + if (node.group == "depth"): + for citation in node.citations: + for cit in nodes: + if (citation.doi_url == cit.doi_url and [citation.doi_url, node.doi_url] not in edges): + edges.append([citation.doi_url, node.doi_url]) + if (node.group == "height"): + for reference in node.references: + for ref in nodes: + if (reference.doi_url == ref.doi_url and [node.doi_url, reference.doi_url] not in edges): + edges.append([node.doi_url,reference.doi_url]) - + +# adds a node for every publication unknown +# adds edges for references between publications def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): ''' - :param pub: Paper - :type pub: Onbject of class Publication - :param search_depth: current recursion step - :type search_depth: integer - :param search_depth_max: recursion limit - :type search_depth_max: integer - :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll - :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) - # adds a node for every publication unknown - # adds edges for references between publications - # returs a list of nodes + :param pub: publication which references will be added + :type pub: Class Publication + + :param search_depth: current depth to search for references + :type search_depth: int + + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean ''' + references_pub_obj_list = [] for reference in pub.references: #iterates over the references of the considered paper not_in_nodes = True #boolean Value to ensure that there will be no dublicates in the set of nodes @@ -151,20 +153,25 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ return references_pub_obj_list +# recursive function to implement height-first-search on references +# references_pub_obj_list: input list of references as publication objects +# search_depth: current search_depth of height-first-search +# search_depth_max: maximal search_depth for dfs +def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): + ''' + :param references_pub_obj_list: list of publications which references will be added + :type references_pub_obj_list: list of objects of type Class Publications + + :param search_depth: current depth to search for references + :type search_depth: int -def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): + :param search_depth_max: maximum depth to search for references + :type search_depth_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean ''' - :param references_pub_obj_list: input list of references as publication objects - :type references_pub_obj_list: liste - :param search_depth: current search_depth of height-first-search - :type search_depth: integer - :param search_depth_max: maximal search_depth for dfs - :type search_depth_max: integer - :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll - :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) - - # recursive function to implement height-first-search on references - ''' + # adds next level to nodes/edges for pub in references_pub_obj_list: new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) @@ -179,18 +186,19 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m def create_graph_structure_citations(pub, search_height, search_height_max, test_var): ''' - :param pub: Paper - :type pub: Onbject of class Publication - :param search_height: current recursion step - :type search_height: integer - :param search_height_max: recursion limit - :type search_height_max: integer - :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll - :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) - # adds a node for every publication unknown - # adds edges for citations between publications - # returns list of nodes + :param pub: publication which citations will be added + :type pub: Class Publication + + :param search_height: current height to search for citations + :type search_height_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean ''' + citations_pub_obj_list = [] for citation in pub.citations: not_in_nodes = True @@ -224,20 +232,25 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test +# recursive function to implement height-first-search on citations +# citations_pub_obj_list: input list of citations as publication objects +# search_height: current search_height of height-first-search +# search_height_max: maximal search_height for dfs +def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): + ''' + :param citations_pub_obj_list: list of publications which citations will be added + :type citations_pub_obj_list: list of objects of type Class Publications + + :param search_height: current height to search for citations + :type search_height_max: int + + :param search_height_max: maximum height to search for citations + :type search_height_max: int -def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): + :param test_var: variable to differenciate between test and url call + :type test_var: boolean ''' - :param references_pub_obj_list: input list of citations as publication objects - :type references_pub_obj_list: liste - :param search_height: current search_height of height-first-search - :type search_height: integer - :param search_height_max: maximal search_height for dfs - :type search_height_max: integer - :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll - :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) - - # recursive function to implement height-first-search on citations - ''' + # adds next level to nodes/edges for pub in citations_pub_obj_list: new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var) @@ -252,17 +265,19 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m def process_main(doi_input_list, search_height, search_depth, test_var = False): ''' - :param doi_input_list: list with dois from user - :type doi_input_list: list - :param search_height: recursion height - :type search_height: integer - :param search_depth: recursion depth - :type search_depth: integer - :param test_var: Wert, um zu entscheiden, ob die Test-Input-Funktion oder die Standartversion gewählt werden soll - :type test_var: Boolscher Wert (True = Test-Funkion, False = Standart-Funktion) - - # main function to call. Needs as input: + :param doi_input_list: input list of doi from UI + :type doi_input_list: list of strings + + :param search_height: maximum height to search for citations + :type search_height: int + + :param search_depth: maximum depth to search for references + :type search_depth: int + + :param test_var: variable to differenciate between test and url call + :type test_var: boolean ''' + # ERROR-Handling doi_array = NULL if (len(doi_input_list) == 0): print("Error, no input data") diff --git a/verarbeitung/__pycache__/Processing.cpython-39.pyc b/verarbeitung/__pycache__/Processing.cpython-39.pyc index da7bd263cdd2ab97aabc416b1431b1463fc2a03e..f16ff9fc08e6d1b3ae555bd65772c0b66c866779 100644 Binary files a/verarbeitung/__pycache__/Processing.cpython-39.pyc and b/verarbeitung/__pycache__/Processing.cpython-39.pyc differ diff --git a/verarbeitung/__pycache__/json_demo.cpython-39.pyc b/verarbeitung/__pycache__/json_demo.cpython-39.pyc index d78081e2f26f27652a5db45cae03cdb00034fe8f..20afc4f92dda6846dc720c9f3eb15bed60c69002 100644 Binary files a/verarbeitung/__pycache__/json_demo.cpython-39.pyc and b/verarbeitung/__pycache__/json_demo.cpython-39.pyc differ diff --git a/verarbeitung/import_from_json.py b/verarbeitung/import_from_json.py index 37c12c5631954dd0b383aa1c7a5bf362db9d8afc..9fe099f0e794933fdd12dc4b3bf85af290dd19dc 100644 --- a/verarbeitung/import_from_json.py +++ b/verarbeitung/import_from_json.py @@ -16,8 +16,9 @@ __status__ = "Production" import json #sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +import sys +sys.path.append("../") from input.interface import InputInterface as Input -#import input class Publication: @@ -72,17 +73,23 @@ def add_ref_and_cit_to_pubs(input_dict): # iterates over the list of edges for edge in input_dict["links"]: - for node in list_of_nodes_py: - + for source in list_of_nodes_py: + for target in list_of_nodes_py: + if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])): + new_reference = Reference(target.doi_url, target.title, target.contributors, target.journal, target.publication_date) + source.references.append(new_reference) + + new_citation = Citation(source.doi_url, source.title, source.contributors, source.journal, source.publication_date) + target.citations.append(new_citation) # adds reference to current node - if (node.doi_url == edge["source"]): - new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) - node.references.append(new_reference) + # if (node.doi_url == edge["source"]): + # new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) + # node.references.append(new_reference) - # adds citation to current node - if (node.doi_url == edge["target"]): - new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) - node.citations.append(new_citation) + # # adds citation to current node + # if (node.doi_url == edge["target"]): + # new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) + # node.citations.append(new_citation) # adds edge to list list_of_edges_py.append([edge["source"],edge["target"]]) @@ -111,3 +118,4 @@ def input_from_json(json_file): return(list_of_nodes_py, list_of_edges_py) + diff --git a/verarbeitung/json_text.json b/verarbeitung/json_text.json new file mode 100644 index 0000000000000000000000000000000000000000..81132f74e1d47b40f3fdb908fdd53884fccf133b --- /dev/null +++ b/verarbeitung/json_text.json @@ -0,0 +1 @@ +{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00249", "group": "input", "citations": 5}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "group": "height", "citations": 2}, {"name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00741", "group": "height", "citations": 0}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "group": "height", "citations": 8}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00250", "group": "height", "citations": 12}, {"name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00203", "group": "input", "citations": 1}, {"name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.0c01006", "group": "depth", "citations": 14}, {"name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.9b00557", "group": "depth", "citations": 9}, {"name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300604z", "group": "depth", "citations": 100}, {"name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.5b00834", "group": "depth", "citations": 48}, {"name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci700036j", "group": "depth", "citations": 32}, {"name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci500209e", "group": "depth", "citations": 100}, {"name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm2005145", "group": "depth", "citations": 100}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp9723574", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp972358w", "group": "depth", "citations": 100}, {"name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00675", "group": "depth", "citations": 25}, {"name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "group": "depth", "citations": 12}, {"name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm300687e", "group": "depth", "citations": 100}, {"name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.8b00312", "group": "depth", "citations": 74}, {"name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00778", "group": "depth", "citations": 65}, {"name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm0306430", "group": "depth", "citations": 97}, {"name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020406h", "group": "depth", "citations": 100}, {"name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300493w", "group": "depth", "citations": 99}, {"name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm049314d", "group": "depth", "citations": 100}, {"name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "doi": "https://doi.org/10.1021/acsomega.1c04320", "group": "height", "citations": 0}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}]} \ No newline at end of file diff --git a/verarbeitung/print_graph_test.py b/verarbeitung/print_graph_test.py index ec3422b90dc06902afa4f98d102efb5004ee266d..3209485d6d8cd65449c7e151b54616d3cf67568e 100644 --- a/verarbeitung/print_graph_test.py +++ b/verarbeitung/print_graph_test.py @@ -19,9 +19,9 @@ import sys #sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') sys.path.append("../") from input.interface import InputInterface as Input -#import input from Processing import process_main from import_from_json import input_from_json +from update_graph import check_graph_updates # a function to print nodes and edges from a graph def print_graph(nodes, edges): @@ -33,6 +33,23 @@ def print_graph(nodes, edges): print(edge,"\n") print(len(nodes)) print(len(edges)) + print(" ") + +def print_extended_graph(nodes, edges): + print("Knoten:\n") + for node in nodes: + print(node.title, "\n") + print(node.doi_url) + for reference in node.references: + print(reference.doi_url) + for citation in node.citations: + print(citation.doi_url) + print("\nKanten:\n") + for edge in edges: + print(edge,"\n") + print(len(nodes)) + print(len(edges)) + print(" ") # program test with some random dois @@ -59,6 +76,26 @@ def try_known_publications(): return(nodes, edges) -nodes, edges = try_known_publications() +def try_delete_nodes(): + doi_list = [] + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203') + nodes, edges = process_main(doi_list,1,1) + print_graph(nodes, edges) + + list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json') + doi_list = [] + doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249') + valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py) + print_graph(valid_nodes, valid_edges) + +def try_import(): + nodes, edges = input_from_json('json_text.json') + print_extended_graph(nodes,edges) + +#nodes, edges = try_known_publications() #nodes_new, edges_new = input_from_json("json_text.json") -#print_graph(nodes_new, edges_new) \ No newline at end of file +#print_graph(nodes_new, edges_new) +try_delete_nodes() + +#try_import() \ No newline at end of file diff --git a/verarbeitung/read_json.py b/verarbeitung/read_json.py deleted file mode 100644 index 7300d8fcdd80379bec084c31786f4532bc491a37..0000000000000000000000000000000000000000 --- a/verarbeitung/read_json.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Functions to read old json files to recreate old grpah structure - -""" - -__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" -__email__ = "cis-project2021@zbh.uni-hamburg.de" -__status__ = "Production" -#__copyright__ = "" -#__credits__ = ["", "", "", ""] -#__license__ = "" -#__version__ = "" -#__maintainer__ = "" - -from bs4 import BeautifulSoup as bs -import requests as req -import sys -import json -from pathlib import Path -#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') -from input.interface import InputInterface as Input -#import input -from input_test import input_test_func - - -class Publication: - #def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group): - def __init__(self, doi_url, title, contributors, journal, publication_date, group): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - #if references is None: - # self.references = [] - #else: - # self.references = ref(references) - #if citations is None: - # self.citations = [] - #else: - # self.citations = cit(citations) - self.group = group - - -class Citation: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -class Reference: - def __init__(self,doi_url, title, contributors, journal, publication_date): - self.doi_url = doi_url - self.title = title - self.contributors = contributors - self.journal = journal - self.publication_date = publication_date - -#def cit(list_doi): -# cits = [] -# for doi_url in list_doi: -# for array in list_of_arrays: -# if doi_url == array[0]: -# cits.append(Citation(array[0], array[1], array[2], array[3], array[4])) -# return cits - -#def ref(list_doi): -# refs = [] -# for doi_url in list_doi: -# for array in list_of_arrays: -# if doi_url == array[0]: -# refs.append(Citation(array[0], array[1], array[2], array[3], array[4])) -# return refs - - - -def read_json(): - with open('json_text.json','r') as file: - obj = json.load(file) - for node in obj["nodes"]: - pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node ["group"]) - nodes.append(pub) - print(pub.doi_url) - print(pub.title) - print(pub.journal) - print(pub.group) - print(" ") - #for edge in obj["edges"]: - #for cit_doi in nodes: - #if (edge[0] == cit_doi.doi_url): - #cit_doi.references.append() - -global nodes, edges -nodes = [] -edges = [] - -read_json() -#print(type(obj)) - -#nodes = obj["nodes"] -#for node in nodes: - - #print(node["doi"]) \ No newline at end of file diff --git a/verarbeitung/update_graph.py b/verarbeitung/update_graph.py new file mode 100644 index 0000000000000000000000000000000000000000..571106805bb5952d7531289f8dbd1854d0792aca --- /dev/null +++ b/verarbeitung/update_graph.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" +Functions to update a graph representing citations between multiple ACS/Nature journals + +""" + +__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski" +__email__ = "cis-project2021@zbh.uni-hamburg.de" +__status__ = "Production" +#__copyright__ = "" +#__credits__ = ["", "", "", ""] +#__license__ = "" +#__version__ = "" +#__maintainer__ = "" + +import sys +from pathlib import Path +#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input') +sys.path.append("../") +from input.interface import InputInterface as Input +from input_test import input_test_func +from Knoten_Vergleich import doi_listen_vergleichen +from Kanten_Vergleich import back_to_valid_edges + + +def get_old_input_dois(old_obj_input_list): + old_input_dois = [] + for pub in old_obj_input_list: + if (pub.group == "input"): + old_input_dois.append(pub.doi_url) + return old_input_dois + +def get_new_input_dois(new_input, test_var): + doi_input_list = [] + for new_node in new_input: + if(test_var): + pub = input_test_func(new_node) + else: + #print(pub_doi) + inter = Input() + try: + pub = inter.get_publication(new_node) + except ValueError: + continue + except IndexError: + continue + doi_input_list.append(pub.doi_url) + return doi_input_list + +def delete_ref_nodes_rec(pub):#, old_obj_list): + for reference in pub.references: + for ref_pub in processed_input_list: + if (ref_pub.doi_url == reference.doi_url): + delete_ref_nodes_rec(ref_pub) + if (pub.group != "input"): + processed_input_list.remove(pub) + +def delete_cit_nodes_rec(pub): + for citation in pub.citations: + for cit_pub in processed_input_list: + if (cit_pub.doi_url == citation.doi_url): + delete_cit_nodes_rec(cit_pub) + if (pub.group != "input"): + processed_input_list.remove(pub) + + +def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, test_var = False): + global processed_input_list, valid_edges + processed_input_list = old_obj_input_list + valid_edges = [] + + old_doi_input_list = get_old_input_dois(old_obj_input_list) + new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var) + common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list) + if (len(inserted_nodes) == 0): + for del_node in deleted_nodes: + for pub in processed_input_list: + if (del_node == pub.doi_url): + delete_ref_nodes_rec(pub)#, processed_input_list) + delete_cit_nodes_rec(pub)#, processed_input_list) + processed_input_list.remove(pub) + valid_edges = back_to_valid_edges(old_edges_list, processed_input_list) + + + return(processed_input_list, valid_edges)