diff --git a/cami/cami.py b/cami/cami.py index 49ca2c44dfb8ce0a00bcbe6cbc37d07826a32e3a..d321f09c4cb10538c7a5d0829c44d8bd08ae413f 100755 --- a/cami/cami.py +++ b/cami/cami.py @@ -14,7 +14,7 @@ import webbrowser import subprocess def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, - output_dir, identifier, save_temps, visualize, save_image, force, drugstone): + output_dir, identifier, save_temps, visualize, save_image, force, drugstone, ncbi): print('CAMI started') nof_tools = len(tools) @@ -97,6 +97,8 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, tool.set_weight(float(tool_weights[idx])) cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path) + if ncbi: + cami.ncbi = True for tool in tool_wrappers: cami.initialize_tool(tool) @@ -161,7 +163,8 @@ if __name__ == "__main__": parser.add_argument('-f', '--force', action='store_true', help="Ignore warnings and overwrite everything when excecuting CAMI.") parser.add_argument('-d', '--drugstone', nargs='*', action='store', default=None, help="Visualize the cami module via the drugstone API. If necessary the user needs to provide a list of the two titles of the two columns that contain the symbols of the gene edges in the inputfile of the PPI network. The order needs to correspond to the order of the first two columns. The default is 'Official_Symbol_Interactor_A Official_Symbol_Interactor_B'. Please note that the symbol columns cannot be the first two columns. If the two main edges in the first two columns are correspond also the gene symbols please duplicate these columns.") - + parser.add_argument('-ncbi', '--ncbi', action='store_true', default=False, + help="Save the NCBI URLs and Summaries of the genes in the CAMI output.") #TODO List with additional arguments if needed by certain tools args = vars(parser.parse_args()) diff --git a/cami/cami_suite.py b/cami/cami_suite.py index fe82c049cfa2db88cfe1e8138f54543448a76863..6ec98e11a06e43baa620daca120b7b48195cf8d6 100644 --- a/cami/cami_suite.py +++ b/cami/cami_suite.py @@ -1,4 +1,4 @@ -import degradome, drugstone +import degradome, drugstone, ncbi def list_combinations(lst, k): """creates all possible combinations of length k with two objects in a list @@ -64,6 +64,7 @@ class cami(): self.code2toolname[0] = 'CAMI' self.home_path = home_path self.cami_vertices = [] + self.ncbi = False def set_initial_seed_lst(self, seedlst): self.initial_seed_lst = seedlst @@ -197,6 +198,7 @@ class cami(): # dictionary to translate a vertex to its gene name # save all predictions by all tools + print('Saving the results...') with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile: outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+ f'initially: {self.initial_seed_lst}\n') @@ -207,11 +209,22 @@ class cami(): print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv') # save the predictions made by cami + ncbi_url = ('\tncbi_url' if self.ncbi else '') + ncbi_summary = ('\tncbi_summary' if self.ncbi else '') + with open(f'{self.output_dir}/CAMI_output_{self.uid}.tsv', 'w') as outputfile: - outputfile.write('gene\tindex_in_graph\tcami_score\tdegree_in_graph\n') + outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n') for vertex in cami_vlist: - outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}\n') + if ncbi: + url, summary = ncbi.send_request(name[vertex]) + url = '\t' + url + if summary is not None: + summary = '\t' + summary + else: + url, summary = '','' + outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n') + # save the whole module with open(f'{self.output_dir}/CAMI_module_{self.uid}.txt', 'w') as modfile: for vertex in seed_genes: modfile.write(f'{vertex}\n') diff --git a/cami/ncbi.py b/cami/ncbi.py new file mode 100644 index 0000000000000000000000000000000000000000..9b404d77b9f1331c912835c82a013cf4ed2b9290 --- /dev/null +++ b/cami/ncbi.py @@ -0,0 +1,11 @@ +import requests,re + +def send_request(gen_id): + url = f"https://www.ncbi.nlm.nih.gov/gene/{gen_id}" + r = requests.get(url) + response = r.text + summary_match = re.search(r'(<dt>Summary</dt>\n)(.*)(<dd>)(.*)(</dd>)', response) + if summary_match: + summary = summary_match.group(4) + return url, summary + return url, None