Skip to content
Snippets Groups Projects
Commit 0c89a26d authored by Le, Mia's avatar Le, Mia
Browse files

fixed usage of predicted_by vertex_property

parent c0adfa23
No related branches found
No related tags found
No related merge requests found
......@@ -16,6 +16,7 @@ class AlgorithmWrapper(object):
self.home_path = ''
self.config = 'camiconf'
self.code = 99
self.debug = False
def set_weight(self, weight):
self.weight = weight
......
......@@ -63,6 +63,7 @@ class DiamondWrapper(AlgorithmWrapper):
file.write(f"{str(edge.source())},{str(edge.target())}\n")
inputparams.append(ppi_file)
assert os.path.exists(ppi_file), f'Could create PPI-network file "{ppi_file}"'
if self.debug:
print(f'{self.name} ppi is saved in {ppi_file}')
# create seed file
......@@ -71,6 +72,7 @@ class DiamondWrapper(AlgorithmWrapper):
for seed in self.seeds:
file.write(f"{seed}\n")
assert os.path.exists(seed_file), f'Could create seed file "{seed_file}"'
if self.debug:
print(f'{self.name} seeds are saved in {seed_file}')
inputparams.append(seed_file)
......@@ -79,6 +81,7 @@ class DiamondWrapper(AlgorithmWrapper):
#MC:
#CONFIG pred_factor = 10, max_preds = 100
nof_preds = min([nof_seeds * self.pred_factor, self.max_preds])
if self.debug:
print(f'With {nof_seeds} seeds, {self.name} will try to predict {nof_preds} active modules.')
inputparams.append(nof_preds)
return inputparams
......
......@@ -64,6 +64,7 @@ class DominoWrapper(AlgorithmWrapper):
"""
inputparams = []
# prepare inputfiles
if self.debug:
print(f'creating {self.name} input files in {self.output_dir}')
ppi_filename = self.name_file('ppi', 'sif')
......@@ -87,15 +88,18 @@ class DominoWrapper(AlgorithmWrapper):
for seed in self.seeds:
file.write(f"{seed}_\n")
inputparams.append(seed_file)
if self.debug:
print(f'{self.name} seeds are saved in {seed_file}')
slices_filename = self.name_file('slices')
slices_output = os.path.join(self.output_dir, slices_filename)
if not os.path.exists(slices_output):
if self.debug:
print('creating domino slices_file...')
command = f'slicer --network_file "{ppi_file}" --output_file "{slices_output}"'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
if self.debug:
print(f'{self.name} slices are saved in {slices_output}')
inputparams.append(slices_output)
return inputparams
......
......@@ -82,11 +82,13 @@ class RobustWrapper(AlgorithmWrapper):
for edge in self.ppi_network.edges():
file.write(f"{str(edge.source())}\t{str(edge.target())}\n")
inputparams.append(ppi_file)
if self.debug:
print(f'{self.name} ppi is saved in {ppi_file}')
with open(seed_file, "w") as file:
for seed in self.seeds:
file.write(f"{seed}\n")
if self.debug:
print(f'{self.name} seeds are saved in {seed_file}')
inputparams.append(seed_file)
......
......@@ -136,8 +136,9 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
result_sets = cami.take_custom_results(external_input, result_sets)
cami.create_consensus(result_sets)
for result in result_sets.keys():
result_sets[result] = result_sets[result].union(cami.seed_lst)
# adds the seeds to the results, right now result_sets contains the seeds that should be ADDED to the module
# for result in result_sets.keys():
# result_sets[result] = result_sets[result].union(cami.seed_lst)
if nvenn or save_image:
print('Sending results to nVenn')
......@@ -169,11 +170,6 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
if nvenn and vis:
url = cami.nvenn()
cami.download_diagram(url)
with open('/Users/Mia/cami_local/cami/data/output/explorativeness.tsv', 'a') as f:
make_consensus(vis=True)
seedname = seeds
for tool in cami.result_gene_sets:
f.write(f'\n{seedname}\t{len(cami.seed_lst)}\t{tool}\t{len(cami.result_gene_sets[tool])}')
with open(f'{output_dir}/00_node_degrees.tsv', 'w') as node_degrees:
node_degrees.write('vertex\tout_degree\tin_degree\n')
......@@ -184,7 +180,15 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
base_seeds = cami.origin_seed_lst
original_seeds = [cami.ppi_vertex2gene[seed] for seed in base_seeds]
print(f'Initializing CAMI and the seed variation by running CAMI with all given seeds:{original_seeds}')
with open('/Users/Mia/cami_local/cami/data/output/explorativeness.tsv', 'a') as f:
make_consensus(vis=True)
seedname = seeds
for tool in cami.result_gene_sets:
f.write(f'\n{seedname}\t{len(cami.seed_lst)}\t{tool}\t{len(cami.result_gene_sets[tool])}')
#make_consensus(vis=True)
random.seed(50)
removal_frac = 0.2
nof_iterations = int(seed_variation)
......@@ -215,10 +219,11 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
res_table1.write(f'{ident}')
# update uid
new_identifier = identifier + f'_{ident}'
# reset cami
cami.reset_cami(new_uid=new_identifier)
# cami.ppi_graph = original_ppi
#remove seeds
#remove seeds (again)
print(f'Removing {nof_removals} seeds from the original seed list...')
removed_seeds_idx = random.sample(list(range(nof_seeds)), nof_removals)
removed_seeds = cami.remove_seeds(removed_seeds_idx)
......@@ -273,13 +278,19 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
rediscovery_rates_results = [results[0] for results in variation_results]
# print(rediscovery_rates_results)
tools = [tool for tool in rediscovery_rates_results[0].keys()]
for idx,tool in enumerate(tools):
if '_' in tool:
tmp_lst = tool.split('_')
linebreak_pos = len(tmp_lst)//2
added_linebreak_lst = tmp_lst[:linebreak_pos] + ['\n'] + tmp_lst[linebreak_pos]
tools[idx] = ''.join(added_linebreak_lst)
redisc_rates = [[res[tool] for res in rediscovery_rates_results] for tool in tools]
#PLOT
# Create a figure instance
plt.figure(figsize=(16,6))
plt.figure(figsize=(50,8))
# Extract Figure and Axes instance
ax1 = plt.subplot(1,2,1, label='ax1')
......@@ -331,55 +342,6 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
plt.savefig(f'{output_dir}/00_{identifier}_seed_variation_result.png')
print(f'Violin plot saved under: 00_{identifier}_seed_variation_result.png')
# plot TP Rate
# Extract Figure and Axes instance
fig2, ax2 = plt.subplots()
colors = ['red', 'blue', 'black', 'purple']
legend = []
# Create a plot
for idx,tool in enumerate(used_tools):
scatter = ax2.scatter(list(range(1,nof_iterations + 1)),tp_rate_dict[tool], color=colors[idx])
legend.append(scatter)
plt.legend(legend,
used_tools)
# Add title
ax2.set_title(f'Sensitivity (TP/TP + FN) in {nof_iterations} iterations.', wrap=True)
ax2.set_xticks(list(range(1,nof_iterations + 1)))
ax2.set_xticklabels([idx if idx%5==0 else '' for idx in range(1,nof_iterations+1)])
ax2.set_xlabel('Iterations')
ax2.set_ylabel('Sensitivity (TP/TP + FN)')
# Save the figure
sensitivity_file = f'{output_dir}/00_{identifier}_seed_variation_tp_rates.png'
fig2.savefig(sensitivity_file)
print(f'Sensitivity plot saved under {sensitivity_file}')
# plot module size frac
fig3, ax3 = plt.subplots()
legend = []
for idx,tool in enumerate(used_tools):
scatter = ax3.scatter(list(range(1,nof_iterations + 1)), module_size_dict[tool], color=colors[idx])
legend.append(scatter)
plt.legend(legend,
used_tools)
# Add title
ax3.set_title(f'Ratio of number of rediscovered seeds and CAMI module size', wrap=True)
ax3.set_xticks((list(range(1,nof_iterations + 1))))
ax3.set_xticklabels([idx if idx%5==0 else '' for idx in range(1,nof_iterations+1)])
ax3.set_xlabel('Iterations')
ax3.set_ylabel('Module size ratio (<rediscovered seeds>/<module size>)')
# Save the fig1ure
size_file = f'{output_dir}/00_{identifier}_redisc_modulesize_rate.png'
fig3.savefig(size_file)
print(f'Sensitivity plot saved under {size_file}')
if save_temps:
print(f'All temporary files were kept in {tmp_dir}')
......
......@@ -107,6 +107,7 @@ class cami():
:param home_path: Path to the cami home directory (gitlab repository)
:type home_path: str
"""
self.debug = False
self.ppi_graph = ppi_graph
self.origin_ppi_graph = ppi_graph.copy()
self.ppi_vertex2gene = self.ppi_graph.vertex_properties["name"]
......@@ -123,10 +124,11 @@ class cami():
self.tmp_dir = tmp_dir
self.nof_tools = len(tool_wrappers)
self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices)
self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices) WITHOUT seeds
self.result_module_sets = {} #contains the genes predicted by the tools (not the indices) WITH seeds
self.cami_module = [] # TODO: pick place where cami_module is set, which consensus approach should we use?
self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers}
self.code2toolname[0] = 'CAMI'
self.cami_vertices = []
self.code2toolname[0] = 'No tool'
self.ncbi = False
config = ConfigParser()
......@@ -149,8 +151,11 @@ class cami():
self.tmp_dir = new_tmp_dir
self.ppi_graph = self.origin_ppi_graph.copy()
self.result_gene_sets = {}
self.result_module_sets = {}
self.cami_vertices = []
self.seed_lst = self.origin_seed_lst.copy()
self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers}
self.code2toolname[0] = 'No tool'
def set_initial_seed_lst(self, seedlst):
self.initial_seed_lst = seedlst
......@@ -184,11 +189,10 @@ class cami():
return preds
def make_evaluation(self):
print(self.result_gene_sets)
biodigest.setup.main(setup_type="api")
for result_set in self.result_gene_sets:
for result_set in self.result_module_sets:
validation_results = biodigest.single_validation.single_validation(
tar=set(self.result_gene_sets[result_set]),
tar=set(self.result_module_sets[result_set]),
tar_id='entrez',
mode='set-set',
distance='jaccard',
......@@ -198,7 +202,7 @@ class cami():
biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir)
biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results,
mode='set-set',
tar=set(self.result_gene_sets[result_set]),
tar=set(self.result_module_sets[result_set]),
tar_id='entrez',
out_dir=self.output_dir,
prefix=f'{result_set}_{self.uid}')
......@@ -285,6 +289,10 @@ class cami():
tool_name_map = self.code2toolname
gene_name_map = self.ppi_vertex2gene
# remove seeds from result sets
for tool in result_sets:
result_sets[tool] -= set(self.seed_lst)
camis = {
'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}},
'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': {
......@@ -328,11 +336,16 @@ class cami():
}},
}
# create integer codes for cami_versions (needed for predicted_by vertex property)
for cami_method_name, cami_params in camis.items():
print("Running " + cami_method_name)
tool_code = max(list(tool_name_map.keys())) + 1
tool_name_map[tool_code] = cami_method_name
cami_vertices, putative_vertices, codes2tools = cami_params['function'](result_sets, ppi_graph, seed_list,
predicted_by, cami_scores,
tool_name_map,
tool_name_map, tool_code,
cami_params['params'])
# sort the resulting vertices according to their cami_score
......@@ -342,23 +355,33 @@ class cami():
# translate the resulting vertex() ids to the corresponding names in the ppi network
cami_genes = [self.ppi_vertex2gene[cami_vertex] for cami_vertex in cami_vlist]
if self.debug:
print(f'With the given seed genes: {seed_genes} \n' +
f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):')
for vertex in cami_vlist:
print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}')
# for visualization
else:
print(f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(seed_genes)} to add to the Active Module')
# for visualization with nvenn
self.result_gene_sets[cami_method_name] = cami_genes
if cami_method_name == 'cami_v1':
# for drugstone
self.cami_vertices = cami_vlist
# transform all vertex indices to their corresponding gene names in a result set
for tool in result_sets:
self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]])
# add seeds to result sets for drugstone and digest
for tool in result_sets:
self.result_module_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]]).union(self.seed_lst)
assert(self.code2toolname == tool_name_map)
# save the results in outputfiles
self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, result_sets, cami_scores)
gene_name_map, codes2tools, cami_scores)
def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, result_sets, cami_scores):
gene_name_map, codes2tools, cami_scores):
# save all predictions by all tools
print('Saving the results...')
with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
......@@ -401,9 +424,6 @@ class cami():
print(f'saved cami output in: {self.output_dir}/CAMI_output_{self.uid}.tsv')
print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/CAMI_nodes_{cami_method}_{self.uid}.txt')
# transform all vertex indices to their corresponding gene names in a result set
for tool in result_sets:
self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]])
# save predictions by the other tools
for tool in self.result_gene_sets:
......@@ -413,9 +433,6 @@ class cami():
outputfile.write(f'{gene}\n')
print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
# for drugstone
self.cami_vertices = cami_vlist
# return values
consensus = {}
consensus['module'] = whole_module
......@@ -456,7 +473,7 @@ class cami():
def use_drugstone(self):
symbol = self.ppi_graph.vertex_properties["symbol"]
cami_module = self.cami_vertices + self.seed_lst
cami_module = self.cami_module
cami_symbols = [symbol[vertex] for vertex in cami_module]
cami_symbols.append
cami_symbol_edges = []
......
import sys, os
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params):
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
consens_threshold = params['consens_threshold']
# calculate gene weights
# set of all result genes
......@@ -10,8 +10,8 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
# parse every result set of each tool
for tool in result_sets:
print(f'{tool.name}: {tool.weight}')
result_sets[tool] -= set(seed_lst)
#print(f'{tool.name}: {tool.weight}')
# TODO: Should we keep the seeds in the result sets?
# everytime a tool predicted a gene add 1 * the tool weight to its weight and add it to the result genes
for vertex in result_sets[tool]:
......@@ -22,7 +22,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
vertex] >= consens_threshold: # if a vertex was predicted twice (or once if there is only 1 tool used) add it to the cami set
putative_vertices.remove(vertex)
cami_vertices.add(vertex)
predicted_by[vertex][0] = 1
predicted_by[vertex][tool_code] = 1
# TODO: Find alternate ways to calculate CAMI scores => The heavy weights should get +0.5 too?
# TODO: Try to rerun cami with varying input seeds?
......@@ -36,7 +36,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
for vertex in putative_vertices:
if vertex in neighbors: # if a vertex is in the neighborhood of the heavy vertices increase the cami_score
cami_vertices.add(vertex)
predicted_by[vertex][0] = 1
predicted_by[vertex][tool_code] = 1
cami_scores[vertex] += 0.5
......@@ -45,4 +45,4 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for
vertex in ppi_graph.vertices()}
return cami_vertices.union(set(seed_lst)), putative_vertices, codes2tools
\ No newline at end of file
return cami_vertices, putative_vertices, codes2tools
\ No newline at end of file
......@@ -7,7 +7,7 @@ from utils.networks import trustrank, betweenness, must
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params):
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty']
confidence_level = params.get('confidence_level',0.5)
......@@ -23,7 +23,6 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
# parse every result set of each tool
counts = defaultdict(lambda: 0)
for tool in result_sets:
result_sets[tool] -= set(seed_lst)
for vertex in result_sets[tool]:
putative_vertices.add(vertex)
counts[vertex] = counts[vertex] + tool.weight
......@@ -49,8 +48,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
for v in putative_vertices:
if scores.a[int(v)] >= threshold and scores.a[int(v)] > 0:
cami_vertices.add(v)
predicted_by[v][tool_code] = 1
# translate tool code to string
codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for
vertex in ppi_graph.vertices()}
return cami_vertices.union(set(seed_lst)), putative_vertices, codes2tools
return cami_vertices, putative_vertices, codes2tools
......@@ -5,10 +5,10 @@ import graph_tool as gt
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params):
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty']
confidence_levelentage = params['confidence_level']
confidence_level = params['confidence_level']
weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees',5)
......@@ -22,7 +22,6 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
# parse every result set of each tool
counts = defaultdict(lambda: 0)
for tool in result_sets:
result_sets[tool] -= set(seed_lst)
for vertex in result_sets[tool]:
putative_vertices.add(vertex)
counts[vertex] = counts[vertex] + tool.weight
......@@ -55,12 +54,13 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
pass
putative_scores = list(putative_score_map.values())
putative_scores.sort()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_levelentage))]
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
for v in putative_vertices:
if putative_score_map[v] >= threshold and putative_score_map[v] > 0:
cami_vertices.add(v)
predicted_by[v][tool_code] = 1
# translate tool code to string
codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for
vertex in ppi_graph.vertices()}
return cami_vertices.union(set(seed_lst)), putative_vertices, codes2tools
return cami_vertices, putative_vertices, codes2tools
......@@ -33,9 +33,8 @@ def csv2graph(inputfile,
unseen_vertices -= 1
if unseen_vertices == 0:
break
g.vertex_properties["betweenness"], g.edge_properties["betweenness"] = graph_tool.centrality.betweenness(g)
g.vertex_properties["cami_score"] = g.new_vertex_property("float", val=0.0)
values = (20) * [-1]
values = (50) * [-1]
g.vertex_properties["predicted_by"] = g.new_vertex_property("vector<int16_t>", val=values)
return g
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment