From 095fce3eac50c52367531c8838e7d1fb6f77fe9c Mon Sep 17 00:00:00 2001 From: Mia_Le <64813807+mlmial@users.noreply.github.com> Date: Sun, 13 Feb 2022 02:31:46 +0100 Subject: [PATCH] automatisches herausziehen der summaries zu den Genen implementiert. --- cami/cami.py | 7 +++++-- cami/cami_suite.py | 19 ++++++++++++++++--- cami/ncbi.py | 11 +++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 cami/ncbi.py diff --git a/cami/cami.py b/cami/cami.py index 49ca2c4..d321f09 100755 --- a/cami/cami.py +++ b/cami/cami.py @@ -14,7 +14,7 @@ import webbrowser import subprocess def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, - output_dir, identifier, save_temps, visualize, save_image, force, drugstone): + output_dir, identifier, save_temps, visualize, save_image, force, drugstone, ncbi): print('CAMI started') nof_tools = len(tools) @@ -97,6 +97,8 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, tool.set_weight(float(tool_weights[idx])) cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path) + if ncbi: + cami.ncbi = True for tool in tool_wrappers: cami.initialize_tool(tool) @@ -161,7 +163,8 @@ if __name__ == "__main__": parser.add_argument('-f', '--force', action='store_true', help="Ignore warnings and overwrite everything when excecuting CAMI.") parser.add_argument('-d', '--drugstone', nargs='*', action='store', default=None, help="Visualize the cami module via the drugstone API. If necessary the user needs to provide a list of the two titles of the two columns that contain the symbols of the gene edges in the inputfile of the PPI network. The order needs to correspond to the order of the first two columns. The default is 'Official_Symbol_Interactor_A Official_Symbol_Interactor_B'. Please note that the symbol columns cannot be the first two columns. If the two main edges in the first two columns are correspond also the gene symbols please duplicate these columns.") - + parser.add_argument('-ncbi', '--ncbi', action='store_true', default=False, + help="Save the NCBI URLs and Summaries of the genes in the CAMI output.") #TODO List with additional arguments if needed by certain tools args = vars(parser.parse_args()) diff --git a/cami/cami_suite.py b/cami/cami_suite.py index fe82c04..6ec98e1 100644 --- a/cami/cami_suite.py +++ b/cami/cami_suite.py @@ -1,4 +1,4 @@ -import degradome, drugstone +import degradome, drugstone, ncbi def list_combinations(lst, k): """creates all possible combinations of length k with two objects in a list @@ -64,6 +64,7 @@ class cami(): self.code2toolname[0] = 'CAMI' self.home_path = home_path self.cami_vertices = [] + self.ncbi = False def set_initial_seed_lst(self, seedlst): self.initial_seed_lst = seedlst @@ -197,6 +198,7 @@ class cami(): # dictionary to translate a vertex to its gene name # save all predictions by all tools + print('Saving the results...') with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile: outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+ f'initially: {self.initial_seed_lst}\n') @@ -207,11 +209,22 @@ class cami(): print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv') # save the predictions made by cami + ncbi_url = ('\tncbi_url' if self.ncbi else '') + ncbi_summary = ('\tncbi_summary' if self.ncbi else '') + with open(f'{self.output_dir}/CAMI_output_{self.uid}.tsv', 'w') as outputfile: - outputfile.write('gene\tindex_in_graph\tcami_score\tdegree_in_graph\n') + outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n') for vertex in cami_vlist: - outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}\n') + if ncbi: + url, summary = ncbi.send_request(name[vertex]) + url = '\t' + url + if summary is not None: + summary = '\t' + summary + else: + url, summary = '','' + outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n') + # save the whole module with open(f'{self.output_dir}/CAMI_module_{self.uid}.txt', 'w') as modfile: for vertex in seed_genes: modfile.write(f'{vertex}\n') diff --git a/cami/ncbi.py b/cami/ncbi.py new file mode 100644 index 0000000..9b404d7 --- /dev/null +++ b/cami/ncbi.py @@ -0,0 +1,11 @@ +import requests,re + +def send_request(gen_id): + url = f"https://www.ncbi.nlm.nih.gov/gene/{gen_id}" + r = requests.get(url) + response = r.text + summary_match = re.search(r'(<dt>Summary</dt>\n)(.*)(<dd>)(.*)(</dd>)', response) + if summary_match: + summary = summary_match.group(4) + return url, summary + return url, None -- GitLab