Skip to content
Snippets Groups Projects
Commit 026eda27 authored by Mia_Le's avatar Mia_Le
Browse files

cleaned up the code. handled saving of the outputfiles.

parent 33aaf45c
No related branches found
No related tags found
No related merge requests found
from AlgorithmWrapper import AlgorithmWrapper
from graph_tool import Graph
import subprocess, os, preprocess
import tempfile
import subprocess, os
class DiamondWrapper(AlgorithmWrapper):
def __init__(self):
super().__init__()
self.name = 'DIAMOnD'
self.code = 1
def run_algorithm(self, inputparams):
"""Run DIAMOnD algorithm
......
......@@ -7,6 +7,7 @@ class DominoWrapper(AlgorithmWrapper):
def __init__(self):
super().__init__()
self.name = 'DOMINO'
self.code = 2
def run_algorithm(self, inputparams):
"""Run Domino algorithm
......
......@@ -16,6 +16,7 @@ class RobustWrapper(AlgorithmWrapper):
def __init__(self):
super().__init__()
self.name = 'ROBUST'
self.code = 3
def run_algorithm(self, inputparams):
# -----------------------------------------------------
......
......@@ -38,9 +38,10 @@ class cami():
self.output_dir = output_dir
self.tmp_dir = tmp_dir
self.uid = uid
self.visualize = False
self.nof_tools = len(tool_wrappers)
self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices)
self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers}
self.code2toolname[0] = 'CAMI'
def run_tool(self, tool):
tool.set_id(self.uid)
# TODO: Rethink placement of creation of the temporary directory?
......@@ -49,7 +50,7 @@ class cami():
inputparams = tool.prepare_input()
print(f'running {tool.name}...')
preds = set(tool.run_algorithm(inputparams))
print(f'{tool.name} predicted {len(preds)} active genes (seed genes not excluded):')
print(f'{tool.name} predicted {len(preds)} active vertices (seeds not excluded):')
print(preds)
return preds
......@@ -57,13 +58,13 @@ class cami():
print('Evaluation not implemented yet.')
def create_consensus(self):
pred_sets = {}
print(f'creating result sets of all {self.nof_tools} tools...')
pred_sets = {}
for tool in self.tool_wrappers:
preds = self.run_tool(tool)
pred_sets[tool] = preds #- seed_set
print(pred_sets)
assert self.nof_tools == len(pred_sets), 'Number of used tools does not match with number of result sets'
......@@ -72,85 +73,119 @@ class cami():
# calculate gene weights
# set of all result genes
gene_weights = self.ppi_graph.vertex_properties["weight"]
cami_scores = self.ppi_graph.vertex_properties["cami_score"]
predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
cami_vertices = set()
putative_vertices = set()
consens_threshold = min(self.nof_tools, 2)
for tool in result_sets:
result_sets[tool] -= set(self.seed_lst)
# TODO: Should we keep the seeds in the result sets?
# everytime a tool predicted a gene add 1 * the tool weight to its weight and add it to the result genes
for vertex in result_sets[tool]:
gene_weights[vertex] += 1.0 * tool.weight
predicted_by[vertex][tool.code] = 1 #TODO: tool or tool.name?
cami_scores[vertex] += 1.0 * tool.weight
putative_vertices.add(vertex)
if gene_weights[vertex] > 2: # if a vertex was predicted twice add it to the cami set
if cami_scores[vertex] >= consens_threshold: # if a vertex was predicted twice (or once if there is only 1 tool used) add it to the cami set
putative_vertices.remove(vertex)
cami_vertices.add(vertex)
predicted_by[vertex][0] = 1
# TODO: add neighbors of the result genes if gene weight == 1
# TODO: calculate CAMI scores
# TODO: Try to rerun cami with varying input seeds
# add a putative gene to the cami set when it is in the neighborhood of the existing cami genes
for vertex in cami_vertices.copy():
# add a putative gene to the cami set when it is in the neighborhood of the existing cami genes or the seed genes
heavy_vertices = cami_vertices.copy()
for seed in self.seed_lst:
heavy_vertices.add(seed)
for vertex in heavy_vertices:
neighbors = vertex.all_neighbors()
for vertex in putative_vertices:
if vertex in neighbors:
cami_vertices.add(vertex)
gene_weights[vertex] += 0.5
predicted_by[vertex][0] = 1
cami_scores[vertex] += 0.5
# sort the resulting vertices according to their cami_score
cami_vlist = sorted(cami_vertices, key=lambda v:cami_scores[v], reverse=True)
cami_genes = [self.ppi_vertex2gene[cami_vertex] for cami_vertex in cami_vlist]
seed_genes = [self.ppi_vertex2gene[seed_vertex] for seed_vertex in self.seed_lst]
print(f'With the given seed genes: {seed_genes} CAMI proposes the following genes to add to the Active Module (sorted by CAMI Score): {cami_genes}')
cami_genes = [self.ppi_vertex2gene[cami_vertex] for cami_vertex in cami_vertices]
cami_genes = sorted(cami_genes, key=lambda v:cami_genes[v])
# save the results in outputfiles
name = self.ppi_vertex2gene
with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\n')
all_vertices = cami_vertices.union(putative_vertices)
for vertex in all_vertices:
outputfile.write(f'{name[vertex]}\t{[self.code2toolname[idx] for idx,code in enumerate(predicted_by[vertex]) if code == 1]}\t{cami_scores[vertex]}\t{str(vertex)}\n')
print(f'With the given seed genes: {self.seed_lst} CAMI proposes the following genes to add to the Active Module (sorted by CAMI Score): {cami_result}')
with open(f'{self.output_dir}/CAMI_{self.uid}_output.tsv', 'w') as outputfile:
outputfile.write('gene\tindex_in_graph\tcami_score\n')
for vertex in cami_vlist:
outputfile.write(f'{name[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\n')
#make intersections
intersection_set = {}
# for visualization
# transform the vertex indices to their corresponding gene names
self.result_gene_sets['CAMI'] = cami_genes
for tool in result_sets:
intersection_set[tool.name] = result_sets[tool]
intersection_set['Seeds'] = set(self.seed_lst)
tools = [tool for tool in intersection_set]
combis = list_combinations([True,False], len(tools))
result_table = {}
for combi in combis:
key = ''
value = set()
for toolid,choice in enumerate(combi):
if choice:
tool = tools[toolid]
key += tool + '_'
if len(value) == 0:
value = intersection_set[tool]
else:
value = value.intersection(intersection_set[tool])
if len(key) > 0 and len(value) > 0:
result_table[key[:-1]] = value
# save intersections in tabular format
intersectionsfilename = f'cami_intersections_{self.uid}_output.txt'
intersectionsfile = f'{self.output_dir}/{intersectionsfilename}'
with open(intersectionsfile, 'w') as file:
file.write('Result sets and intersections (combined by _) of all used tools and the used seed genes.\n')
file.write('All seed genes were removed from the result sets of the tools\n')
for group in result_table:
file.write(f'{group}: {result_table[group]}\n')
print(f'Intersections and result sets of all tools were saved to {intersectionsfile}')
def visualize(self, degradome_sets):
self.result_gene_sets[tool.name] = set([name[vertex] for vertex in result_sets[tool]])
def visualize(self):
# visualize with degradome
if self.nof_tools < 5:
print('Visualizing results using Degradome...')
print(f'creating a separate set for the seeds...')
# degradome_sets['CAMI'] = set(result_genes)
degradome_sets['Seeds'] = set(self.seed_lst)
url = degradome.send_request(degradome_sets)
webbrowser.open(url)
elif nof_tools == 6:
print('Visualizing using Degradome...(seeds excluded from results)')
# degradome_sets = result_sets.copy()
if self.nof_tools < 7:
# print('Visualizing results using Degradome...')
# print(f'creating a separate set for the seeds...')
# degradome_sets['CAMI'] = set(result_genes)
url = degradome.send_request(degradome_sets)
# degradome_sets['Seeds'] = set(self.seed_lst)
url = degradome.send_request(self.result_gene_sets)
webbrowser.open(url)
# elif nof_tools == 6:
# print('Visualizing using Degradome...(seeds excluded from results)')
# # degradome_sets = result_sets.copy()
# # degradome_sets['CAMI'] = set(result_genes)
# url = degradome.send_request(degradome_sets)
# webbrowser.open(url)
else:
print('Cannot use degradome to create venn diagrams of 6 or more tools')
# for node in union:
# do we even need the intersections?
# def intersect(self)
# #make intersections
# intersection_set = {}
# for tool in result_sets:
# intersection_set[tool.name] = result_sets[tool]
# intersection_set['Seeds'] = set(self.seed_lst)
# tools = [tool for tool in intersection_set]
# combis = list_combinations([True,False], len(tools))
# result_table = {}
# for combi in combis:
# key = ''
# value = set()
# for toolid,choice in enumerate(combi):
# if choice:
# tool = tools[toolid]
# key += tool + '_'
# if len(value) == 0:
# value = intersection_set[tool]
# else:
# value = value.intersection(intersection_set[tool])
# if len(key) > 0 and len(value) > 0:
# result_table[key[:-1]] = value
# # save intersections in tabular format
# intersectionsfilename = f'cami_intersections_{self.uid}_output.txt'
# intersectionsfile = f'{self.output_dir}/{intersectionsfilename}'
# with open(intersectionsfile, 'w') as file:
# file.write('Result sets and intersections (combined by _) of all used tools and the used seed genes.\n')
# file.write('All seed genes were removed from the result sets of the tools\n')
# for group in result_table:
# file.write(f'{group}: {result_table[group]}\n')
# print(f'Intersections and result sets of all tools were saved to {intersectionsfile}')
......@@ -13,7 +13,8 @@ def csv2graph(inputfile, delimiter="\t"):
"""
g = graph_tool.load_graph_from_csv(inputfile, skip_first=True,
csv_options={'delimiter': '\t', 'quotechar': '"'})
g.vertex_properties["weight"] = g.new_vertex_property("float", val=0.0)
g.vertex_properties["cami_score"] = g.new_vertex_property("float", val=0.0)
g.vertex_properties["predicted_by"] = g.new_vertex_property("vector<int16_t>", val=[-1,-1,-1,-1,-1])
return g
def txt2lst(seed_file):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment