Skip to content
Snippets Groups Projects
Commit 07f3794a authored by Malte Schokolowski's avatar Malte Schokolowski
Browse files

worked on input reading

parent 8e31975f
No related branches found
No related tags found
1 merge request!11merge verarbeitung to main repo
#!/usr/bin/env python3 #!/usr/bin/env python3
def back_to_valid_edges(Kanten_aus_Json, Geloechte_Knoten): def back_to_valid_edges(links_from_json, processed_input_list):
''' '''
:param Kanten_aus_Json: list of edges from the old graph :param links_from_json: list of edges from the old graph
:type Kanten_aus_Json: list :type links_from_json: list
:param Geloechte_Knoten: list of deleted nodes from the old graph :param processed_input_list: list pubs still in graph
:type Geloechte_Knoten: list :type processed_input_list: list
function that deletes edges, if one ore two including nodes are deleted nodes function that deletes edges, if one ore two including nodes are deleted nodes
''' '''
list_of_edges_from_json = Kanten_aus_Json list_of_valid_edges = links_from_json
list_of_valid_edges = list_of_edges_from_json
list_of_deleted_nodes = Geloechte_Knoten
for deleted_node in list_of_deleted_nodes: #iterates over all deleted nodes
for edge in list_of_edges_from_json: #iterates over all edges from old graph #iterates over all edges from old graph
for node in edge: #checks for both including nodes if one of them was delted for edge in links_from_json:
if node == deleted_node: # if one of them is a deleted node
list_of_valid_edges.remove(edge) #removes the edge # counter for adjacent nodes
break #ist überflüssig, nur fürs verständnis found_adj_nodes = 0
for pub in processed_input_list:
# checks for both adjacent nodes of edge if pub is source/target node
for adj_node in edge:
# increases counter if adjacent node was found
if (adj_node == pub.doi_url):
found_adj_nodes += 1
#removes the edge if less than 2 adjacent nodes found
if (found_adj_nodes < 2):
list_of_valid_edges.remove(edge)
return(list_of_valid_edges) return(list_of_valid_edges)
......
...@@ -18,14 +18,14 @@ def doi_listen_vergleichen(alte,neue): ...@@ -18,14 +18,14 @@ def doi_listen_vergleichen(alte,neue):
all_dois = dois_from_old_graph + dois_from_new_graph all_dois = dois_from_old_graph + dois_from_new_graph
for doi in all_dois: # iterates over the merged list of new and old dois for doi in all_dois: # iterates over the merged list of new and old dois
if ((Counter(all_dois)[doi]) == 2) & (doi not in common_nodes): # If the doi occurs twice the node is in the old and the new graph if ((all_dois.count(doi) == 2) & (doi not in common_nodes)): # If the doi occurs twice the node is in the old and the new graph
common_nodes.append(doi) #appends the doi to common ones, if its not alredy in it common_nodes.append(doi) #appends the doi to common ones, if its not alredy in it
elif ((doi in dois_from_old_graph) & (doi not in dois_from_new_graph)): #If the doi occurs once and it is from old graph it is a deleted node elif ((doi in dois_from_old_graph) & (doi not in dois_from_new_graph)): #If the doi occurs once and it is from old graph it is a deleted node
deleted_nodes.append(doi) #appends the doi to deleted ones deleted_nodes.append(doi) #appends the doi to deleted ones
elif ((doi in dois_from_new_graph) & (doi not in dois_from_old_graph)): #if the doi occurs ince and it is from new graph it is a inserted node elif ((doi in dois_from_new_graph) & (doi not in dois_from_old_graph)): #if the doi occurs ince and it is from new graph it is a inserted node
inserted_nodes.append(doi) #appends the doi to the inserted ones inserted_nodes.append(doi) #appends the doi to the inserted ones
return(common_nodes, inserted_nodes, deleted_nodes) return(common_nodes, inserted_nodes, deleted_nodes)
#Test Prints #Test Prints
#liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"] #liste_1 = ["doi_1","doi_2","doi_3","doi_4","doi_5"]
......
...@@ -27,6 +27,20 @@ from json_demo import output_to_json ...@@ -27,6 +27,20 @@ from json_demo import output_to_json
# adds every publication from input list to graph structure # adds every publication from input list to graph structure
# doi_input_list: list of publication dois from user # doi_input_list: list of publication dois from user
def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var): def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, test_var):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: list of strings
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
'''
references_pub_obj_list = [] references_pub_obj_list = []
citations_pub_obj_list = [] citations_pub_obj_list = []
...@@ -73,6 +87,11 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t ...@@ -73,6 +87,11 @@ def initialize_nodes_list(doi_input_list, search_depth_max, search_height_max, t
# adds edges between citation and reference group # adds edges between citation and reference group
def complete_inner_edges(test_var): def complete_inner_edges(test_var):
'''
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
'''
for node in nodes: for node in nodes:
if (node.group == "depth"): if (node.group == "depth"):
for citation in node.citations: for citation in node.citations:
...@@ -90,6 +109,20 @@ def complete_inner_edges(test_var): ...@@ -90,6 +109,20 @@ def complete_inner_edges(test_var):
# adds a node for every publication unknown # adds a node for every publication unknown
# adds edges for references between publications # adds edges for references between publications
def create_graph_structure_references(pub, search_depth, search_depth_max, test_var): def create_graph_structure_references(pub, search_depth, search_depth_max, test_var):
'''
:param pub: publication which references will be added
:type pub: Class Publication
:param search_depth: current depth to search for references
:type search_depth_max: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
'''
references_pub_obj_list = [] references_pub_obj_list = []
for reference in pub.references: for reference in pub.references:
not_in_nodes = True not_in_nodes = True
...@@ -131,6 +164,20 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_ ...@@ -131,6 +164,20 @@ def create_graph_structure_references(pub, search_depth, search_depth_max, test_
# search_depth: current search_depth of height-first-search # search_depth: current search_depth of height-first-search
# search_depth_max: maximal search_depth for dfs # search_depth_max: maximal search_depth for dfs
def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var): def process_references_rec(references_pub_obj_list, search_depth, search_depth_max, test_var):
'''
:param references_pub_obj_list: list of publications which references will be added
:type references_pub_obj_list: list of objects of type Class Publications
:param search_depth: current depth to search for references
:type search_depth_max: int
:param search_depth_max: maximum depth to search for references
:type search_depth_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
'''
# adds next level to nodes/edges # adds next level to nodes/edges
for pub in references_pub_obj_list: for pub in references_pub_obj_list:
new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var) new_reference_pub_obj_list = create_graph_structure_references(pub, search_depth, search_depth_max, test_var)
...@@ -145,6 +192,20 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m ...@@ -145,6 +192,20 @@ def process_references_rec(references_pub_obj_list, search_depth, search_depth_m
# adds a node for every publication unknown # adds a node for every publication unknown
# adds edges for citations between publications # adds edges for citations between publications
def create_graph_structure_citations(pub, search_height, search_height_max, test_var): def create_graph_structure_citations(pub, search_height, search_height_max, test_var):
'''
:param pub: publication which citations will be added
:type pub: Class Publication
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
'''
citations_pub_obj_list = [] citations_pub_obj_list = []
for citation in pub.citations: for citation in pub.citations:
not_in_nodes = True not_in_nodes = True
...@@ -186,6 +247,20 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test ...@@ -186,6 +247,20 @@ def create_graph_structure_citations(pub, search_height, search_height_max, test
# search_height: current search_height of height-first-search # search_height: current search_height of height-first-search
# search_height_max: maximal search_height for dfs # search_height_max: maximal search_height for dfs
def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var): def process_citations_rec(citations_pub_obj_list, search_height, search_height_max, test_var):
'''
:param citations_pub_obj_list: list of publications which citations will be added
:type citations_pub_obj_list: list of objects of type Class Publications
:param search_height: current height to search for citations
:type search_height_max: int
:param search_height_max: maximum height to search for citations
:type search_height_max: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
'''
# adds next level to nodes/edges # adds next level to nodes/edges
for pub in citations_pub_obj_list: for pub in citations_pub_obj_list:
new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var) new_citation_pub_obj_list = create_graph_structure_citations(pub, search_height, search_height_max, test_var)
...@@ -203,6 +278,20 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m ...@@ -203,6 +278,20 @@ def process_citations_rec(citations_pub_obj_list, search_height, search_height_m
# search_depth: max search depth to process to # search_depth: max search depth to process to
# test_var: only needed for unit test as True, default is False # test_var: only needed for unit test as True, default is False
def process_main(doi_input_list, search_height, search_depth, test_var = False): def process_main(doi_input_list, search_height, search_depth, test_var = False):
'''
:param doi_input_list: input list of doi from UI
:type doi_input_list: list of strings
:param search_height: maximum height to search for citations
:type search_height: int
:param search_depth: maximum depth to search for references
:type search_depth: int
:param test_var: variable to differenciate between test and url call
:type test_var: boolean
'''
# ERROR-Handling doi_array = NULL # ERROR-Handling doi_array = NULL
if (len(doi_input_list) == 0): if (len(doi_input_list) == 0):
print("Error, no input data") print("Error, no input data")
......
No preview for this file type
No preview for this file type
...@@ -72,17 +72,23 @@ def add_ref_and_cit_to_pubs(input_dict): ...@@ -72,17 +72,23 @@ def add_ref_and_cit_to_pubs(input_dict):
# iterates over the list of edges # iterates over the list of edges
for edge in input_dict["links"]: for edge in input_dict["links"]:
for node in list_of_nodes_py: for source in list_of_nodes_py:
for target in list_of_nodes_py:
if ((source.doi_url == edge["source"]) and (target.doi_url == edge["target"])):
new_reference = Reference(target.doi_url, target.title, target.contributors, target.journal, target.publication_date)
source.references.append(new_reference)
new_citation = Citation(source.doi_url, source.title, source.contributors, source.journal, source.publication_date)
target.citations.append(new_citation)
# adds reference to current node # adds reference to current node
if (node.doi_url == edge["source"]): # if (node.doi_url == edge["source"]):
new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) # new_reference = Reference(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
node.references.append(new_reference) # node.references.append(new_reference)
# adds citation to current node # # adds citation to current node
if (node.doi_url == edge["target"]): # if (node.doi_url == edge["target"]):
new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date) # new_citation = Citation(node.doi_url, node.title, node.contributors, node.journal, node.publication_date)
node.citations.append(new_citation) # node.citations.append(new_citation)
# adds edge to list # adds edge to list
list_of_edges_py.append([edge["source"],edge["target"]]) list_of_edges_py.append([edge["source"],edge["target"]])
...@@ -111,3 +117,4 @@ def input_from_json(json_file): ...@@ -111,3 +117,4 @@ def input_from_json(json_file):
return(list_of_nodes_py, list_of_edges_py) return(list_of_nodes_py, list_of_edges_py)
{"nodes": [{"name": "Comparing Molecular Patterns Using the Example of SMARTS: Applications and Filter Collection Analysis", "author": ["Emanuel S. R. Ehmki", "Robert Schmidt", "Farina Ohm", "Matthias Rarey"], "year": "May 24, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00249", "group": "input", "citations": 5}, {"name": "Combining Machine Learning and Computational Chemistry for Predictive Insights Into Chemical Systems", "author": ["John A. Keith", "Valentin Vassilev-Galindo", "Bingqing Cheng", "Stefan Chmiela", "Michael Gastegger", "Klaus-Robert M\u00fcller", "Alexandre Tkatchenko"], "year": "July 7, 2021", "journal": "Chem. Rev.", "doi": "https://doi.org/10.1021/acs.chemrev.1c00107", "group": "height", "citations": 2}, {"name": "Disconnected Maximum Common Substructures under Constraints", "author": ["Robert Schmidt", "Florian Krull", "Anna Lina Heinzke", "Matthias Rarey"], "year": "December 16, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00741", "group": "height", "citations": 0}, {"name": "Evolution of Novartis\u2019 Small Molecule Screening Deck Design", "author": ["Ansgar Schuffenhauer", "Nadine Schneider", "Samuel Hintermann", "Douglas Auld", "Jutta Blank", "Simona Cotesta", "Caroline Engeloch", "Nikolas Fechner", "Christoph Gaul", "Jerome Giovannoni", "Johanna Jansen", "John Joslin", "Philipp Krastel", "Eugen Lounkine", "John Manchester", "Lauren G. Monovich", "Anna Paola Pelliccioli", "Manuel Schwarze", "Michael D. Shultz", "Nikolaus Stiefl", "Daniel K. Baeschlin"], "year": "November 3, 2020", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.0c01332", "group": "height", "citations": 8}, {"name": "Comparing Molecular Patterns Using the Example of SMARTS: Theory and Algorithms", "author": ["Robert Schmidt", "Emanuel S. R. Ehmki", "Farina Ohm", "Hans-Christian Ehrlich", "Andriy Mashychev", "Matthias Rarey"], "year": "May 23, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00250", "group": "height", "citations": 12}, {"name": "AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings", "author": ["Jerome Eberhardt", "Diogo Santos-Martins", "Andreas F. Tillack", "Stefano Forli"], "year": "July 19, 2021", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.1c00203", "group": "input", "citations": 1}, {"name": "Accelerating AutoDock4 with GPUs and Gradient-Based Local Search", "author": ["Diogo Santos-Martins", "Leonardo Solis-Vasquez", "Andreas F Tillack", "Michel F Sanner", "Andreas Koch", "Stefano Forli"], "year": "January 6, 2021", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.0c01006", "group": "depth", "citations": 14}, {"name": "Docking Flexible Cyclic Peptides with AutoDock CrankPep", "author": ["Yuqi Zhang", "Michel F. Sanner"], "year": "September 11, 2019", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.9b00557", "group": "depth", "citations": 9}, {"name": "Lessons Learned in Empirical Scoring with smina from the CSAR 2011 Benchmarking Exercise", "author": ["David Ryan Koes", "Matthew P. Baumgartner", "Carlos J. Camacho"], "year": "February 4, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300604z", "group": "depth", "citations": 100}, {"name": "Vina-Carb: Improving Glycosidic Angles during Carbohydrate Docking", "author": ["Anita K. Nivedha", "David F. Thieker", "Spandana Makeneni", "Huimin Hu", "Robert J. Woods"], "year": "January 8, 2016", "journal": "Journal of Chemical Theory and Computation", "doi": "https://doi.org/10.1021/acs.jctc.5b00834", "group": "depth", "citations": 48}, {"name": "Lennard-Jones Potential and Dummy Atom Settings to Overcome the AUTODOCK Limitation in Treating Flexible Ring Systems", "author": ["Stefano Forli", "Maurizio Botta"], "year": "June 22, 2007", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci700036j", "group": "depth", "citations": 32}, {"name": "AutoDock4Zn: An Improved AutoDock Force Field for Small-Molecule Docking to Zinc Metalloproteins", "author": ["Diogo Santos-Martins", "Stefano Forli", "Maria Jo\u00e3o Ramos", "Arthur J. Olson"], "year": "June 15, 2014", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci500209e", "group": "depth", "citations": 100}, {"name": "A Force Field with Discrete Displaceable Waters and Desolvation Entropy for Hydrated Ligand Docking", "author": ["Stefano Forli", "Arthur J. Olson"], "year": "December 9, 2011", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm2005145", "group": "depth", "citations": 100}, {"name": "Consensus Docking: Improving the Reliability of Docking in a Virtual Screening Context", "author": ["Douglas R. Houston", "Malcolm D. Walkinshaw"], "year": "January 27, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300399w", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp9723574", "group": "depth", "citations": 100}, {"name": "Inhomogeneous Fluid Approach to Solvation Thermodynamics. 2. Applications to Simple Fluids", "author": ["Themis Lazaridis"], "year": "April 14, 1998", "journal": "Journal of Physical Chemistry B", "doi": "https://doi.org/10.1021/jp972358w", "group": "depth", "citations": 100}, {"name": "ZINC20\u2014A Free Ultralarge-Scale Chemical Database for Ligand Discovery", "author": ["John J. Irwin", "Khanh G. Tang", "Jennifer Young", "Chinzorig Dandarchuluun", "Benjamin R. Wong", "Munkhzul Khurelbaatar", "Yurii S. Moroz", "John Mayfield", "Roger A. Sayle"], "year": "October 29, 2020", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.0c00675", "group": "depth", "citations": 25}, {"name": "Structural Biology-Inspired Discovery of Novel KRAS\u2013PDE\u03b4 Inhibitors", "author": ["Yan Jiang", "Chunlin Zhuang", "Long Chen", "Junjie Lu", "Guoqiang Dong", "Zhenyuan Miao", "Wannian Zhang", "Jian Li", "Chunquan Sheng"], "year": "September 20, 2017", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/acs.jmedchem.7b01243", "group": "depth", "citations": 12}, {"name": "Directory of Useful Decoys, Enhanced (DUD-E): Better Ligands and Decoys for Better Benchmarking", "author": ["Michael M. Mysinger", "Michael Carchia", "John. J. Irwin", "Brian K. Shoichet"], "year": "June 20, 2012", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm300687e", "group": "depth", "citations": 100}, {"name": "Evaluation of AutoDock and AutoDock Vina on the CASF-2013 Benchmark", "author": ["Thomas Gaillard"], "year": "July 10, 2018", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.8b00312", "group": "depth", "citations": 74}, {"name": "Autodock Vina Adopts More Accurate Binding Poses but Autodock4 Forms Better Binding Affinity", "author": ["Nguyen Thanh Nguyen", "Trung Hai Nguyen", "T. Ngoc Han Pham", "Nguyen Truong Huy", "Mai Van Bay", "Minh Quan Pham", "Pham Cam Nam", "Van V. Vu", "Son Tung Ngo"], "year": "December 30, 2019", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/acs.jcim.9b00778", "group": "depth", "citations": 65}, {"name": "Glide:\u2009 A New Approach for Rapid, Accurate Docking and Scoring. 1. Method and Assessment of Docking Accuracy", "author": ["Richard A. Friesner", "Jay L. Banks", "Robert B. Murphy", "Thomas A. Halgren", "Jasna J. Klicic", "Daniel T. Mainz", "Matthew P. Repasky", "Eric H. Knoll", "Mee Shelley", "Jason K. Perry", "David E. Shaw", "Perry Francis", "Peter S. Shenkin"], "year": "February 27, 2004", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm0306430", "group": "depth", "citations": 97}, {"name": "Surflex:\u2009 Fully Automatic Flexible Molecular Docking Using a Molecular Similarity-Based Search Engine", "author": ["Ajay N. Jain"], "year": "January 21, 2003", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm020406h", "group": "depth", "citations": 100}, {"name": "ID-Score: A New Empirical Scoring Function Based on a Comprehensive Set of Descriptors Related to Protein\u2013Ligand Interactions", "author": ["Guo-Bo Li", "Ling-Ling Yang", "Wen-Jing Wang", "Lin-Li Li", "Sheng-Yong Yang"], "year": "February 9, 2013", "journal": "Journal of Chemical Information and Modeling", "doi": "https://doi.org/10.1021/ci300493w", "group": "depth", "citations": 99}, {"name": "A Knowledge-Based Energy Function for Protein\u2212Ligand, Protein\u2212Protein, and Protein\u2212DNA Complexes", "author": ["Chi Zhang", "Song Liu", "Qianqian Zhu", "Yaoqi Zhou"], "year": "February 16, 2005", "journal": "Journal of Medicinal Chemistry", "doi": "https://doi.org/10.1021/jm049314d", "group": "depth", "citations": 100}, {"name": "Novel Anti-Hepatitis B Virus Activity of Euphorbia schimperi and Its Quercetin and Kaempferol Derivatives", "author": ["Mohammad K. Parvez", "Sarfaraz Ahmed", "Mohammed S. Al-Dosari", "Mazin A. S. Abdelwahid", "Ahmed H. Arbab", "Adnan J. Al-Rehaily", "Mai M. Al-Oqail"], "year": "October 21, 2021", "journal": "ACS Omega", "doi": "https://doi.org/10.1021/acsomega.1c04320", "group": "height", "citations": 0}], "links": [{"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00250", "target": "https://doi.org/10.1021/acs.jcim.9b00249"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.0c01006"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.9b00557"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jctc.5b00834"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300399w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp9723574"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jp972358w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.0c00675"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jmedchem.7b01243"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm0306430"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm020406h"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/ci300493w"}, {"source": "https://doi.org/10.1021/acs.jcim.1c00203", "target": "https://doi.org/10.1021/jm049314d"}, {"source": "https://doi.org/10.1021/acsomega.1c04320", "target": "https://doi.org/10.1021/acs.jcim.1c00203"}, {"source": "https://doi.org/10.1021/acs.chemrev.1c00107", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jmedchem.0c01332", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00249", "target": "https://doi.org/10.1021/acs.jcim.9b00250"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/ci300604z"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.9b00557", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/ci700036j"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/ci500209e"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.8b00312", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/ci500209e", "target": "https://doi.org/10.1021/jm2005145"}, {"source": "https://doi.org/10.1021/acs.jcim.0c00741", "target": "https://doi.org/10.1021/jm300687e"}, {"source": "https://doi.org/10.1021/acs.jcim.9b00778", "target": "https://doi.org/10.1021/acs.jcim.8b00312"}, {"source": "https://doi.org/10.1021/acs.jctc.0c01006", "target": "https://doi.org/10.1021/acs.jcim.9b00778"}]}
\ No newline at end of file
...@@ -22,6 +22,7 @@ from input.interface import InputInterface as Input ...@@ -22,6 +22,7 @@ from input.interface import InputInterface as Input
#import input #import input
from Processing import process_main from Processing import process_main
from import_from_json import input_from_json from import_from_json import input_from_json
from update_graph import check_graph_updates
# a function to print nodes and edges from a graph # a function to print nodes and edges from a graph
def print_graph(nodes, edges): def print_graph(nodes, edges):
...@@ -33,6 +34,23 @@ def print_graph(nodes, edges): ...@@ -33,6 +34,23 @@ def print_graph(nodes, edges):
print(edge,"\n") print(edge,"\n")
print(len(nodes)) print(len(nodes))
print(len(edges)) print(len(edges))
print(" ")
def print_extended_graph(nodes, edges):
print("Knoten:\n")
for node in nodes:
print(node.title, "\n")
print(node.doi_url)
for reference in node.references:
print(reference.doi_url)
for citation in node.citations:
print(citation.doi_url)
print("\nKanten:\n")
for edge in edges:
print(edge,"\n")
print(len(nodes))
print(len(edges))
print(" ")
# program test with some random dois # program test with some random dois
...@@ -59,6 +77,26 @@ def try_known_publications(): ...@@ -59,6 +77,26 @@ def try_known_publications():
return(nodes, edges) return(nodes, edges)
nodes, edges = try_known_publications() def try_delete_nodes():
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.1c00203')
nodes, edges = process_main(doi_list,1,1)
print_graph(nodes, edges)
list_of_nodes_py, list_of_edges_py = input_from_json('json_text.json')
doi_list = []
doi_list.append('https://pubs.acs.org/doi/10.1021/acs.jcim.9b00249')
valid_nodes, valid_edges = check_graph_updates(doi_list, list_of_nodes_py, list_of_edges_py)
print_graph(valid_nodes, valid_edges)
def try_import():
nodes, edges = input_from_json('json_text.json')
print_extended_graph(nodes,edges)
#nodes, edges = try_known_publications()
#nodes_new, edges_new = input_from_json("json_text.json") #nodes_new, edges_new = input_from_json("json_text.json")
#print_graph(nodes_new, edges_new) #print_graph(nodes_new, edges_new)
\ No newline at end of file try_delete_nodes()
#try_import()
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to read old json files to recreate old grpah structure
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
from bs4 import BeautifulSoup as bs
import requests as req
import sys
import json
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
from input.interface import InputInterface as Input
#import input
from input_test import input_test_func
class Publication:
#def __init__(self, doi_url, title, contributors, journal, publication_date, references, citations, group):
def __init__(self, doi_url, title, contributors, journal, publication_date, group):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
#if references is None:
# self.references = []
#else:
# self.references = ref(references)
#if citations is None:
# self.citations = []
#else:
# self.citations = cit(citations)
self.group = group
class Citation:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
class Reference:
def __init__(self,doi_url, title, contributors, journal, publication_date):
self.doi_url = doi_url
self.title = title
self.contributors = contributors
self.journal = journal
self.publication_date = publication_date
#def cit(list_doi):
# cits = []
# for doi_url in list_doi:
# for array in list_of_arrays:
# if doi_url == array[0]:
# cits.append(Citation(array[0], array[1], array[2], array[3], array[4]))
# return cits
#def ref(list_doi):
# refs = []
# for doi_url in list_doi:
# for array in list_of_arrays:
# if doi_url == array[0]:
# refs.append(Citation(array[0], array[1], array[2], array[3], array[4]))
# return refs
def read_json():
with open('json_text.json','r') as file:
obj = json.load(file)
for node in obj["nodes"]:
pub = Publication(node["doi"], node["name"], node["author"], node["journal"], node["year"], node ["group"])
nodes.append(pub)
print(pub.doi_url)
print(pub.title)
print(pub.journal)
print(pub.group)
print(" ")
#for edge in obj["edges"]:
#for cit_doi in nodes:
#if (edge[0] == cit_doi.doi_url):
#cit_doi.references.append()
global nodes, edges
nodes = []
edges = []
read_json()
#print(type(obj))
#nodes = obj["nodes"]
#for node in nodes:
#print(node["doi"])
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Functions to update a graph representing citations between multiple ACS/Nature journals
"""
__authors__ = "Donna Löding, Alina Molkentin, Xinyi Tang, Judith Große, Malte Schokolowski"
__email__ = "cis-project2021@zbh.uni-hamburg.de"
__status__ = "Production"
#__copyright__ = ""
#__credits__ = ["", "", "", ""]
#__license__ = ""
#__version__ = ""
#__maintainer__ = ""
import sys
from pathlib import Path
#sys.path.insert(1, 'C:\Users\Malte\Git\CiS-Projekt\ci-s-projekt-verarbeitung\input')
sys.path.append(".")
from input.interface import InputInterface as Input
from input_test import input_test_func
#import input
from Knoten_Vergleich import doi_listen_vergleichen
from Kanten_Vergleich import back_to_valid_edges
def get_old_input_dois(old_obj_input_list):
old_input_dois = []
for pub in old_obj_input_list:
if (pub.group == "input"):
old_input_dois.append(pub.doi_url)
return old_input_dois
def get_new_input_dois(new_input, test_var):
doi_input_list = []
for new_node in new_input:
if(test_var):
pub = input_test_func(new_node)
else:
#print(pub_doi)
inter = Input()
try:
pub = inter.get_publication(new_node)
except ValueError:
continue
except IndexError:
continue
doi_input_list.append(pub.doi_url)
return doi_input_list
def delete_ref_nodes_rec(pub):#, old_obj_list):
for reference in pub.references:
for ref_pub in processed_input_list:
if (ref_pub.doi_url == reference.doi_url):
delete_ref_nodes_rec(ref_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
def delete_cit_nodes_rec(pub):
for citation in pub.citations:
for cit_pub in processed_input_list:
if (cit_pub.doi_url == citation.doi_url):
delete_cit_nodes_rec(cit_pub)
if (pub.group != "input"):
processed_input_list.remove(pub)
def check_graph_updates(new_doi_input_list, old_obj_input_list, old_edges_list, test_var = False):
global processed_input_list, valid_edges
processed_input_list = old_obj_input_list
valid_edges = []
old_doi_input_list = get_old_input_dois(old_obj_input_list)
new_doi_input_list = get_new_input_dois(new_doi_input_list, test_var)
common_nodes, inserted_nodes, deleted_nodes = doi_listen_vergleichen(old_doi_input_list, new_doi_input_list)
if (len(inserted_nodes) == 0):
for del_node in deleted_nodes:
for pub in processed_input_list:
if (del_node == pub.doi_url):
delete_ref_nodes_rec(pub)#, processed_input_list)
delete_cit_nodes_rec(pub)#, processed_input_list)
processed_input_list.remove(pub)
valid_edges = back_to_valid_edges(old_edges_list, processed_input_list)
return(processed_input_list, valid_edges)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment