Skip to content
Snippets Groups Projects
Commit 095fce3e authored by Mia_Le's avatar Mia_Le
Browse files

automatisches herausziehen der summaries zu den Genen implementiert.

parent 1221aeab
No related branches found
No related tags found
No related merge requests found
......@@ -14,7 +14,7 @@ import webbrowser
import subprocess
def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
output_dir, identifier, save_temps, visualize, save_image, force, drugstone):
output_dir, identifier, save_temps, visualize, save_image, force, drugstone, ncbi):
print('CAMI started')
nof_tools = len(tools)
......@@ -97,6 +97,8 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
tool.set_weight(float(tool_weights[idx]))
cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path)
if ncbi:
cami.ncbi = True
for tool in tool_wrappers:
cami.initialize_tool(tool)
......@@ -161,7 +163,8 @@ if __name__ == "__main__":
parser.add_argument('-f', '--force', action='store_true', help="Ignore warnings and overwrite everything when excecuting CAMI.")
parser.add_argument('-d', '--drugstone', nargs='*', action='store', default=None,
help="Visualize the cami module via the drugstone API. If necessary the user needs to provide a list of the two titles of the two columns that contain the symbols of the gene edges in the inputfile of the PPI network. The order needs to correspond to the order of the first two columns. The default is 'Official_Symbol_Interactor_A Official_Symbol_Interactor_B'. Please note that the symbol columns cannot be the first two columns. If the two main edges in the first two columns are correspond also the gene symbols please duplicate these columns.")
parser.add_argument('-ncbi', '--ncbi', action='store_true', default=False,
help="Save the NCBI URLs and Summaries of the genes in the CAMI output.")
#TODO List with additional arguments if needed by certain tools
args = vars(parser.parse_args())
......
import degradome, drugstone
import degradome, drugstone, ncbi
def list_combinations(lst, k):
"""creates all possible combinations of length k with two objects in a list
......@@ -64,6 +64,7 @@ class cami():
self.code2toolname[0] = 'CAMI'
self.home_path = home_path
self.cami_vertices = []
self.ncbi = False
def set_initial_seed_lst(self, seedlst):
self.initial_seed_lst = seedlst
......@@ -197,6 +198,7 @@ class cami():
# dictionary to translate a vertex to its gene name
# save all predictions by all tools
print('Saving the results...')
with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+
f'initially: {self.initial_seed_lst}\n')
......@@ -207,11 +209,22 @@ class cami():
print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
# save the predictions made by cami
ncbi_url = ('\tncbi_url' if self.ncbi else '')
ncbi_summary = ('\tncbi_summary' if self.ncbi else '')
with open(f'{self.output_dir}/CAMI_output_{self.uid}.tsv', 'w') as outputfile:
outputfile.write('gene\tindex_in_graph\tcami_score\tdegree_in_graph\n')
outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n')
for vertex in cami_vlist:
outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}\n')
if ncbi:
url, summary = ncbi.send_request(name[vertex])
url = '\t' + url
if summary is not None:
summary = '\t' + summary
else:
url, summary = '',''
outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
# save the whole module
with open(f'{self.output_dir}/CAMI_module_{self.uid}.txt', 'w') as modfile:
for vertex in seed_genes:
modfile.write(f'{vertex}\n')
......
import requests,re
def send_request(gen_id):
url = f"https://www.ncbi.nlm.nih.gov/gene/{gen_id}"
r = requests.get(url)
response = r.text
summary_match = re.search(r'(<dt>Summary</dt>\n)(.*)(<dd>)(.*)(</dd>)', response)
if summary_match:
summary = summary_match.group(4)
return url, summary
return url, None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment