Skip to content
Snippets Groups Projects
Commit 36aa2efc authored by bay9355's avatar bay9355
Browse files
parents 8e1bf655 e105b38d
No related branches found
No related tags found
No related merge requests found
......@@ -35,6 +35,7 @@ def list_combinations(lst, k):
assert len(set(combs)) == nof_combs
return (combs)
def initialize_cami(path_to_ppi_file=''):
cami_params = {}
# find homepath aka ~/cami
......@@ -85,12 +86,13 @@ def initialize_cami(path_to_ppi_file=''):
seed_lists = {seedname: preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths}
class cami():
""" A module that is used for Active Module identifaction based on a
consensus approach
"""
def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf', seed_score=10, parallelization=False):
def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf',
seed_score=10, parallelization=False):
"""Instance variables of CAMI
:param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
......@@ -201,13 +203,15 @@ class cami():
ref=set(self.seed_lst),
ref_id='entrez')
if validation_results['status'] == 'ok':
biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir)
biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}',
self.output_dir)
biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results,
mode='set-set',
tar=set(self.result_module_sets[result_set]),
tar_id='entrez',
out_dir=self.output_dir,
prefix=f'{result_set}_{self.uid}')
def run_threaded_tool(self, tool, pred_sets):
"""run a tool in one thread and save the results into a dictionary pred_sets
......@@ -296,46 +300,45 @@ class cami():
for tool in result_sets:
result_sets[tool] -= set(self.seed_lst)
params = {'hub_pentalty': [0, 0.25, 0.5, 0.75, 1.0], 'damping_factor': [0.1, 0.25, 0.5, 0.75], 'confidence_level': [0.2, 0.35, 0.5, 0.75], 'ranking':["trustrank", "betweenness", "harmonic"], 'function':[cami_v2.run_cami, cami_v3.run_cami]}
camis = {
'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}},
'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5
}},
'cami_v2_param1_bc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
'cami_v2_param1_b': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.3, 'ranking': 'betweenness', 'confidence_level': 0.5
}},
'cami_v2_param1_m': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
'cami_v2_param1_hc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.3,'ranking': 'harmonic', 'confidence_level': 0.5
}},
'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
}},
'cami_v2_param2_m': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'ranking': 'must',
'cami_v2_param2_b': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'ranking': 'betweenness', 'confidence_level': 0.5
}},
'cami_v2_param2_bc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
'cami_v2_param2_hc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'ranking': 'harmonic', 'confidence_level': 0.5
}},
'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5
}},
'cami_v3_param1_bc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
'cami_v3_param1_b': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.3, 'ranking': 'betweenness', 'confidence_level': 0.5
}},
'cami_v3_param1_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
'cami_v3_param1_hc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.3, 'ranking': 'harmonic', 'confidence_level': 0.5
}},
'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'trustrank'
}},
'cami_v3_param2_bc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
}},
'cami_v3_param2_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must'
'cami_v3_param2_b': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'ranking': 'betweenness', 'confidence_level': 0.5
}},
'cami_v3_param3_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must', 'trees': 15
'cami_v3_param2_hc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'ranking': 'harmonic', 'confidence_level': 0.5
}},
}
......@@ -369,7 +372,8 @@ class cami():
for vertex in cami_vlist:
print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}')
else:
print(f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module')
print(
f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module')
# for visualization with nvenn
self.result_gene_sets[cami_method_name] = set(cami_genes)
......@@ -381,8 +385,10 @@ class cami():
# add seeds to result sets for drugstone and digest
for toolname in self.result_gene_sets:
self.result_module_sets[toolname] = self.result_gene_sets[toolname].union(set([gene_name_map[svertex] for svertex in self.seed_lst]))
print(f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
self.result_module_sets[toolname] = self.result_gene_sets[toolname].union(
set([gene_name_map[svertex] for svertex in self.seed_lst]))
print(
f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, cami_scores):
......@@ -390,13 +396,16 @@ class cami():
if self.debug:
print('Saving the results...')
with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+
outputfile.write(
f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n' +
f'initially: {self.initial_seed_lst}\n')
outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n')
all_vertices = cami_vertices.union(putative_vertices)
for vertex in all_vertices:
outputfile.write(f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
if self.debug: print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
outputfile.write(
f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
if self.debug: print(
f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
# save the predictions made by cami
ncbi_url = ('\tncbi_url' if self.ncbi else '')
......@@ -414,7 +423,8 @@ class cami():
summary = ''
else:
url, summary = '', ''
outputfile.write(f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
outputfile.write(
f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
# # save the whole module
# whole_module = []
......@@ -429,7 +439,6 @@ class cami():
# print(f'saved {cami_method} output in: {cami_method}_output_{self.uid}.tsv')
# print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/{cami_method}_module_{self.uid}.txt')
# save predicted modules by all other tools
for tool in self.result_module_sets:
with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile:
......@@ -439,7 +448,6 @@ class cami():
if self.debug:
print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
def use_nvenn(self):
"""Create Venn Diagrams via a external tool named degradome.
Sends a request via requests to the degradome server.
......@@ -503,4 +511,3 @@ class cami():
for seed in removed_seeds:
self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0
return removed_seeds
import sys
from collections import defaultdict
import graph_tool as gt
from utils.networks import trustrank, betweenness, must
from utils.networks import trustrank, betweenness, must, closeness
......@@ -38,12 +38,15 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights)
scores = betweenness(subnet, hub_penalty, weights)
elif ranking_method == 'must':
scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees)
elif ranking_method == 'harmonic':
scores = closeness(subnet, hub_penalty, weights)
putative_scores = scores.a[[int(id) for id in putative_vertices]]
putative_scores.sort()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
for v in putative_vertices:
if scores.a[int(v)] >= threshold and scores.a[int(v)] > 0:
......
import sys
from collections import defaultdict
from utils.networks import trustrank, betweenness, must
from utils.networks import trustrank, betweenness, must, closeness
import graph_tool as gt
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty']
confidence_level = params['confidence_level']
confidence_level = params.get('confidence_level', 0.5)
weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees', 5)
......@@ -40,9 +41,11 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights)
scores = betweenness(subnet, hub_penalty, weights)
elif ranking_method == 'must':
scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance)
elif ranking_method == 'harmonic':
scores = closeness(subnet, hub_penalty, weights)
tool_scores[tool] = scores
putative_score_map = defaultdict(lambda: 0)
......
......@@ -9,5 +9,5 @@ chdir((sys.argv[0].rsplit('/', 1))[0])
networkfile = "../data/input/networks/example_network.tsv"
seedfile = "../data/input/seeds/example_seeds.txt"
identifier = "example_run"
command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -d -p --f;'
command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -p --f;'
subprocess.call(command, shell=True)
\ No newline at end of file
......@@ -9,6 +9,7 @@ import graph_tool.topology as gtt
import graph_tool.util as gtu
import itertools as it
def edge_weights(g, base_weigths, hub_penalty, inverse=False):
avdeg = gts.vertex_average(g, "total")[0]
weights = g.new_edge_property("double", val=avdeg)
......@@ -124,6 +125,7 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
return g2
def find_bridges(g):
r"""Finds all bridges in a graph."""
global __time
......@@ -139,6 +141,7 @@ def find_bridges(g):
__dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge)
return is_bridge
def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge):
visited[node] = True
global __time
......@@ -246,19 +249,27 @@ def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
return scores
def betweenness(g, seed_ids, hub_penalty, weights=None):
def betweenness(g, hub_penalty, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
scores = g.new_vertex_property("float")
all_pairs = [(source, target) for source in seed_ids for target in seed_ids if source < target]
for source, target in all_pairs:
local_scores = g.new_vertex_property("float")
num_paths = 0.0
for path in gtt.all_shortest_paths(g, source, target, weights=weights):
local_scores.a[path[1:-1]] += 1
num_paths += 1
if num_paths > 0:
local_scores.a /= num_paths
scores.a += local_scores.a
# Call graph-tool to compute TrustRank.
# trust = g.new_vertex_property("double")
scores, _ = gtc.betweenness(g, weight=weights)
# Compute and return the results.
return scores
def closeness(g, hub_penalty, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
# Call graph-tool to compute TrustRank.
# trust = g.new_vertex_property("double")
scores = gtc.closeness(g, weight=weights, harmonic=True)
# Compute and return the results.
return scores
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment