From 095fce3eac50c52367531c8838e7d1fb6f77fe9c Mon Sep 17 00:00:00 2001
From: Mia_Le <64813807+mlmial@users.noreply.github.com>
Date: Sun, 13 Feb 2022 02:31:46 +0100
Subject: [PATCH] automatisches herausziehen der summaries zu den Genen
 implementiert.

---
 cami/cami.py       |  7 +++++--
 cami/cami_suite.py | 19 ++++++++++++++++---
 cami/ncbi.py       | 11 +++++++++++
 3 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 cami/ncbi.py

diff --git a/cami/cami.py b/cami/cami.py
index 49ca2c4..d321f09 100755
--- a/cami/cami.py
+++ b/cami/cami.py
@@ -14,7 +14,7 @@ import webbrowser
 import subprocess
 
 def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
-         output_dir, identifier, save_temps, visualize, save_image, force, drugstone):
+         output_dir, identifier, save_temps, visualize, save_image, force, drugstone, ncbi):
     print('CAMI started')
     
     nof_tools = len(tools)
@@ -97,6 +97,8 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
         tool.set_weight(float(tool_weights[idx]))
 
     cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path)
+    if ncbi:
+        cami.ncbi = True
     for tool in tool_wrappers:
         cami.initialize_tool(tool)
 
@@ -161,7 +163,8 @@ if __name__ == "__main__":
     parser.add_argument('-f', '--force', action='store_true', help="Ignore warnings and overwrite everything when excecuting CAMI.")
     parser.add_argument('-d', '--drugstone', nargs='*', action='store', default=None,
                         help="Visualize the cami module via the drugstone API. If necessary the user needs to provide a list of the two titles of the two columns that contain the symbols of the gene edges in the inputfile of the PPI network. The order needs to correspond to the order of the first two columns. The default is 'Official_Symbol_Interactor_A Official_Symbol_Interactor_B'. Please note that the symbol columns cannot be the first two columns. If the two main edges in the first two columns are correspond also the gene symbols please duplicate these columns.")
-    
+    parser.add_argument('-ncbi', '--ncbi', action='store_true', default=False,
+                        help="Save the NCBI URLs and Summaries of the genes in the CAMI output.")
     #TODO List with additional arguments if needed by certain tools
 
     args = vars(parser.parse_args())
diff --git a/cami/cami_suite.py b/cami/cami_suite.py
index fe82c04..6ec98e1 100644
--- a/cami/cami_suite.py
+++ b/cami/cami_suite.py
@@ -1,4 +1,4 @@
-import degradome, drugstone
+import degradome, drugstone, ncbi
 
 def list_combinations(lst, k):
     """creates all possible combinations of length k with two objects in a list
@@ -64,6 +64,7 @@ class cami():
         self.code2toolname[0] = 'CAMI'
         self.home_path = home_path
         self.cami_vertices = []
+        self.ncbi = False
 
     def set_initial_seed_lst(self, seedlst):
         self.initial_seed_lst = seedlst
@@ -197,6 +198,7 @@ class cami():
         # dictionary to translate a vertex to its gene name
         
         # save all predictions by all tools
+        print('Saving the results...')
         with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
             outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+
                              f'initially: {self.initial_seed_lst}\n')
@@ -207,11 +209,22 @@ class cami():
         print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
 
         # save the predictions made by cami
+        ncbi_url = ('\tncbi_url' if self.ncbi else '')
+        ncbi_summary = ('\tncbi_summary' if self.ncbi else '')
+
         with open(f'{self.output_dir}/CAMI_output_{self.uid}.tsv', 'w') as outputfile:
-            outputfile.write('gene\tindex_in_graph\tcami_score\tdegree_in_graph\n')     
+            outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n')     
             for vertex in cami_vlist:
-                outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}\n')
+                if ncbi:
+                    url, summary = ncbi.send_request(name[vertex])
+                    url = '\t' + url
+                    if summary is not None:
+                        summary = '\t' + summary
+                else:
+                    url, summary = '',''
+                outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
         
+        # save the whole module
         with open(f'{self.output_dir}/CAMI_module_{self.uid}.txt', 'w') as modfile:
                 for vertex in seed_genes:
                     modfile.write(f'{vertex}\n')
diff --git a/cami/ncbi.py b/cami/ncbi.py
new file mode 100644
index 0000000..9b404d7
--- /dev/null
+++ b/cami/ncbi.py
@@ -0,0 +1,11 @@
+import requests,re
+
+def send_request(gen_id):
+    url = f"https://www.ncbi.nlm.nih.gov/gene/{gen_id}"
+    r = requests.get(url)
+    response = r.text
+    summary_match = re.search(r'(<dt>Summary</dt>\n)(.*)(<dd>)(.*)(</dd>)', response)
+    if summary_match:
+        summary = summary_match.group(4)
+        return url, summary
+    return url, None
-- 
GitLab