Skip to content
Snippets Groups Projects
Commit a7feae0a authored by AndiMajore's avatar AndiMajore
Browse files

fixed direction of confidence_level

parent 0932bd52
Branches
No related tags found
No related merge requests found
......@@ -77,7 +77,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
if consensus:
cami.reset_cami()
if evaluate or (not consensus and not evaluate and not seed_variation):
if evaluate and (consensus or seed_variation):
cami.make_evaluation()
# SEED VARIATION
......@@ -122,7 +122,7 @@ if __name__ == "__main__":
help="List of weights for the tools. If you have [domino, diamond, robust] as list of tools and diamonds weight should be twice as high as the other tools type: 1 2 1")
parser.add_argument('-c', '--consensus', action='store_true', help="run only the consensus prediction part of cami")
parser.add_argument('-var', '--seed_variation', action='store', help="repeat consensus selection multiple times (please provide the number of iterations) while removing 20 percent of the seeds.")
parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST")
parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST", default=False)
parser.add_argument('-o', '--output_dir', action='store', help="path to output directory", default=None)
parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID")
parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files")
......
......@@ -12,7 +12,6 @@ import matplotlib.pyplot as plt
import itertools
def generate_param_combinations(params_dict):
"""
Generates all possible combinations of parameters for the given function(s) and returns them as a list.
......@@ -36,6 +35,7 @@ def generate_param_combinations(params_dict):
result.append([function_name, params_str, {'params': param_dict, 'function': function}])
return result
def initialize_cami(path_to_ppi_file=''):
cami_params = {}
# find homepath aka ~/cami
......@@ -91,8 +91,9 @@ class cami():
consensus approach
"""
def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='', configuration='camiconf',
parallelization=False, ncbi=False, debug=False,save_temps=False, toolweights=None):
def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='',
configuration='camiconf',
parallelization=False, ncbi=False, debug=False, save_temps=False, toolweights=None):
"""Instance variables of CAMI
:param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
......@@ -122,7 +123,7 @@ class cami():
self.prediction_tools = [wrapper.name for wrapper in tool_wrappers]
self.toolweights = toolweights
self.home_path = home_path
if uid==None:
if uid == None:
uid = str(uuid.uuid4())
self.uid = str(uid)
if output_dir == None:
......@@ -132,7 +133,7 @@ class cami():
if self.debug:
print(f"Output directory of cami: {output_dir}")
self.output_dir = output_dir
tmp_dir = os.path.join(home_path, 'data', 'tmp', self.uid)
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
......@@ -190,7 +191,7 @@ class cami():
tool.set_weight(self.toolweights[tool.code - 1])
else:
tool.set_weight()
def initialize_all_tools(self):
for tool in self.tool_wrappers:
self.initialize_tool(tool)
......@@ -217,7 +218,7 @@ class cami():
seed_gene_lst = [self.ppi_vertex2gene[seed] for seed in self.seed_lst]
ppi_graph_file = os.path.join(self.tmp_dir, f'ppi_graph_{self.uid}.graphml')
self.ppi_graph.save(ppi_graph_file)
biodigest.setup.main(setup_type="api")
for result_set in self.result_module_sets:
set_validation_results = biodigest.single_validation.single_validation(
......@@ -227,7 +228,7 @@ class cami():
distance='jaccard',
ref=set(seed_gene_lst),
ref_id='entrez')
if set_validation_results['status'] == 'ok':
biodigest.single_validation.save_results(set_validation_results, f'{result_set}_{self.uid}',
self.output_dir)
......@@ -238,14 +239,14 @@ class cami():
out_dir=self.output_dir,
prefix=f'{result_set}_{self.uid}',
file_type='png')
with open(os.path.join(self.tmp_dir, f'{result_set}_{self.uid}_relevance_scores.tsv'), 'w') as f:
rel_score_name = list(set_validation_results['input_values']['values'].keys())[0]
f.write(f'value\t{rel_score_name}\n')
val_res_dct = set_validation_results['input_values']['values'][rel_score_name]
for val in val_res_dct:
f.write(f'{val}\t{val_res_dct[val]}\n')
# sub_validation_results = biodigest.single_validation.single_validation(
# tar=set(self.result_module_sets[result_set]),
# tar_id='entrez',
......@@ -267,7 +268,6 @@ class cami():
# out_dir=self.output_dir,
# prefix=f'{result_set}_{self.uid}',
# file_type='png')
def run_threaded_tool(self, tool, pred_sets):
"""run a tool in one thread and save the results into a dictionary pred_sets
......@@ -347,7 +347,7 @@ class cami():
# set of all result genes
cami_scores = self.ppi_graph.vertex_properties["cami_score"]
predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
consens_threshold = min(self.nof_tools, 2)
# consens_threshold = min(self.nof_tools, 2)
ppi_graph = self.ppi_graph
seed_list = self.seed_lst
tool_name_map = self.code2toolname
......@@ -356,22 +356,22 @@ class cami():
# remove seeds from result sets
for tool in result_sets:
result_sets[tool] -= set(self.seed_lst)
params_tr = {'hub_penalty': [0.25],
'damping_factor': [0.7],
'confidence_level': [0.5],
'damping_factor': [0.75],
'confidence_level': [0.8],
'ranking': ['trustrank'],
'function': {'cami_v3':cami_v3.run_cami}}
'function': {'cami_v3': cami_v3.run_cami}}
cami_setting_list = generate_param_combinations(params_tr)
camis = {}
for setting in cami_setting_list:
if setting[1]:
func_name = setting[0] + '_' +setting[1].rsplit('_', 1)[-1]
func_name = setting[0] + '_' + setting[1].rsplit('_', 1)[-1]
else:
func_name = setting[0]
camis[func_name] = setting[2]
# transform all vertex indices to their corresponding gene names in a result set
......@@ -413,7 +413,8 @@ class cami():
sys.setrecursionlimit(recursion_limit)
# save the results in outputfiles
if save_output:
self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices,
cami_genes,
gene_name_map, codes2tools, cami_scores)
# add seeds to result sets for drugstone and digest
......@@ -423,7 +424,6 @@ class cami():
print(
f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, cami_scores):
# save all predictions by all tools
......@@ -468,8 +468,8 @@ class cami():
outputfile.write(f'{gene}\n')
if self.debug:
print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
def visualize_and_save_comparison_matrix(self, additional_id='',
def visualize_and_save_comparison_matrix(self, additional_id='',
title='Intersections of result_gene_sets of all analyzed algorithms.'):
"""Create a comparison matrix of the results of all tools. And save it as png file.
"""
......@@ -489,7 +489,7 @@ class cami():
plt.close(comp_fig)
plt.close(norm_fig)
return comp_fig_file, norm_fig_file
def use_nvenn(self, download=False):
"""Create Venn Diagrams via a external tool named nvenn by degradome.
Sends a request via requests to the degradome server.
......@@ -510,7 +510,7 @@ class cami():
else:
print('Cannot use degradome to create venn diagrams of 6 or more tools')
return None
def download_diagram(self, url):
venn_name = f'{self.output_dir}/vdiagram_{self.uid}'
response = degradome.download_image(url, venn_name + '.png')
......
......@@ -4,12 +4,11 @@ import graph_tool as gt
from utils.networks import trustrank, betweenness, must, closeness
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
hub_penalty = params['hub_penalty']
confidence_level = params.get('confidence_level',0.5)
confidence_level = params.get('confidence_level', 0.5)
weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees', 5)
......@@ -34,6 +33,25 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
weights = subnet.new_edge_property("double")
for v, c in counts.items():
weights.a[int(v)] = c
# sum = 0
# TODO idea for more sophisticated weighting: but then as prior for trustrank and not as weights
# for v in subnet.vertices():
# c = counts[v]
# min_dist = None
# for seed in seed_lst:
# dist = gt.topology.shortest_distance(ppi_graph, seed, v)
# if dist == 2147483647:
# dist = 10
# if min_dist is None:
# min_dist = dist
# else:
# min_dist = min(min_dist, dist)
# min_dist /= len(seed_lst)
# score = c + (10-min_dist)
# sum += score
# weights.a[int(v)] = c
# for v in subnet.vertices():
# weights.a[int(v)] /= sum
if ranking_method == 'trustrank':
damping_factor = params['damping_factor']
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
......@@ -44,8 +62,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
elif ranking_method == 'harmonic':
scores = closeness(subnet, hub_penalty, weights)
putative_scores = scores.a[[int(id) for id in putative_vertices]]
putative_scores = list(scores.a[[int(id) for id in putative_vertices]])
putative_scores.sort()
putative_scores.reverse()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
for v in putative_vertices:
......
......@@ -57,6 +57,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
pass
putative_scores = list(putative_score_map.values())
putative_scores.sort()
putative_scores.reverse()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
for v in putative_vertices:
if putative_score_map[v] >= threshold and putative_score_map[v] > 0:
......
......@@ -8,6 +8,7 @@ import subprocess
chdir((sys.argv[0].rsplit('/', 1))[0])
networkfile = "../data/input/networks/example_network.tsv"
seedfile = "../data/input/seeds/example_seeds.txt"
seedfile = "../data/input/seeds/multiple_sclerosis.tsv"
identifier = "example_run"
command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p --f;'
command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p -f -v;'
subprocess.call(command, shell=True)
\ No newline at end of file
......@@ -10,7 +10,7 @@ import graph_tool.util as gtu
import itertools as it
def edge_weights(g, base_weigths, hub_penalty, inverse=False):
def edge_weights(g, base_weigths, hub_penalty = 0, inverse=False):
avdeg = gts.vertex_average(g, "total")[0]
weights = g.new_edge_property("double", val=avdeg)
if base_weigths is not None:
......@@ -237,13 +237,15 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10):
return score_prop
def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
def trustrank(g, seed_ids, damping_factor, hub_penalty=0, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
weights = edge_weights(g, weights, hub_penalty, inverse=False)
# Call graph-tool to compute TrustRank.
trust = g.new_vertex_property("double")
trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
# Compute and return the results.
return scores
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment