Skip to content
Snippets Groups Projects
Commit 36aa2efc authored by bay9355's avatar bay9355
Browse files
parents 8e1bf655 e105b38d
No related branches found
No related tags found
No related merge requests found
...@@ -35,6 +35,7 @@ def list_combinations(lst, k): ...@@ -35,6 +35,7 @@ def list_combinations(lst, k):
assert len(set(combs)) == nof_combs assert len(set(combs)) == nof_combs
return (combs) return (combs)
def initialize_cami(path_to_ppi_file=''): def initialize_cami(path_to_ppi_file=''):
cami_params = {} cami_params = {}
# find homepath aka ~/cami # find homepath aka ~/cami
...@@ -85,12 +86,13 @@ def initialize_cami(path_to_ppi_file=''): ...@@ -85,12 +86,13 @@ def initialize_cami(path_to_ppi_file=''):
seed_lists = {seedname: preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths} seed_lists = {seedname: preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths}
class cami(): class cami():
""" A module that is used for Active Module identifaction based on a """ A module that is used for Active Module identifaction based on a
consensus approach consensus approach
""" """
def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf', seed_score=10, parallelization=False):
def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf',
seed_score=10, parallelization=False):
"""Instance variables of CAMI """Instance variables of CAMI
:param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
...@@ -201,13 +203,15 @@ class cami(): ...@@ -201,13 +203,15 @@ class cami():
ref=set(self.seed_lst), ref=set(self.seed_lst),
ref_id='entrez') ref_id='entrez')
if validation_results['status'] == 'ok': if validation_results['status'] == 'ok':
biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir) biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}',
self.output_dir)
biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results, biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results,
mode='set-set', mode='set-set',
tar=set(self.result_module_sets[result_set]), tar=set(self.result_module_sets[result_set]),
tar_id='entrez', tar_id='entrez',
out_dir=self.output_dir, out_dir=self.output_dir,
prefix=f'{result_set}_{self.uid}') prefix=f'{result_set}_{self.uid}')
def run_threaded_tool(self, tool, pred_sets): def run_threaded_tool(self, tool, pred_sets):
"""run a tool in one thread and save the results into a dictionary pred_sets """run a tool in one thread and save the results into a dictionary pred_sets
...@@ -296,46 +300,45 @@ class cami(): ...@@ -296,46 +300,45 @@ class cami():
for tool in result_sets: for tool in result_sets:
result_sets[tool] -= set(self.seed_lst) result_sets[tool] -= set(self.seed_lst)
params = {'hub_pentalty': [0, 0.25, 0.5, 0.75, 1.0], 'damping_factor': [0.1, 0.25, 0.5, 0.75], 'confidence_level': [0.2, 0.35, 0.5, 0.75], 'ranking':["trustrank", "betweenness", "harmonic"], 'function':[cami_v2.run_cami, cami_v3.run_cami]}
camis = { camis = {
'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}}, 'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}},
'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': { 'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75 'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5
}}, }},
'cami_v2_param1_bc': {'function': cami_v2.run_cami, 'params': { 'cami_v2_param1_b': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness' 'hub_penalty': 0.3, 'ranking': 'betweenness', 'confidence_level': 0.5
}}, }},
'cami_v2_param1_m': {'function': cami_v2.run_cami, 'params': { 'cami_v2_param1_hc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must' 'hub_penalty': 0.3,'ranking': 'harmonic', 'confidence_level': 0.5
}}, }},
'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': { 'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
}}, }},
'cami_v2_param2_m': {'function': cami_v2.run_cami, 'params': { 'cami_v2_param2_b': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'ranking': 'must', 'hub_penalty': 0, 'ranking': 'betweenness', 'confidence_level': 0.5
}}, }},
'cami_v2_param2_bc': {'function': cami_v2.run_cami, 'params': { 'cami_v2_param2_hc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness' 'hub_penalty': 0, 'ranking': 'harmonic', 'confidence_level': 0.5
}}, }},
'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': { 'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75 'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5
}}, }},
'cami_v3_param1_bc': {'function': cami_v3.run_cami, 'params': { 'cami_v3_param1_b': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness' 'hub_penalty': 0.3, 'ranking': 'betweenness', 'confidence_level': 0.5
}}, }},
'cami_v3_param1_m': {'function': cami_v3.run_cami, 'params': { 'cami_v3_param1_hc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must' 'hub_penalty': 0.3, 'ranking': 'harmonic', 'confidence_level': 0.5
}}, }},
'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': { 'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'trustrank' 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
}},
'cami_v3_param2_bc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
}}, }},
'cami_v3_param2_m': {'function': cami_v3.run_cami, 'params': { 'cami_v3_param2_b': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must' 'hub_penalty': 0, 'ranking': 'betweenness', 'confidence_level': 0.5
}}, }},
'cami_v3_param3_m': {'function': cami_v3.run_cami, 'params': { 'cami_v3_param2_hc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must', 'trees': 15 'hub_penalty': 0, 'ranking': 'harmonic', 'confidence_level': 0.5
}}, }},
} }
...@@ -369,7 +372,8 @@ class cami(): ...@@ -369,7 +372,8 @@ class cami():
for vertex in cami_vlist: for vertex in cami_vlist:
print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}') print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}')
else: else:
print(f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module') print(
f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module')
# for visualization with nvenn # for visualization with nvenn
self.result_gene_sets[cami_method_name] = set(cami_genes) self.result_gene_sets[cami_method_name] = set(cami_genes)
...@@ -381,8 +385,10 @@ class cami(): ...@@ -381,8 +385,10 @@ class cami():
# add seeds to result sets for drugstone and digest # add seeds to result sets for drugstone and digest
for toolname in self.result_gene_sets: for toolname in self.result_gene_sets:
self.result_module_sets[toolname] = self.result_gene_sets[toolname].union(set([gene_name_map[svertex] for svertex in self.seed_lst])) self.result_module_sets[toolname] = self.result_gene_sets[toolname].union(
print(f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes') set([gene_name_map[svertex] for svertex in self.seed_lst]))
print(
f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, cami_scores): gene_name_map, codes2tools, cami_scores):
...@@ -390,13 +396,16 @@ class cami(): ...@@ -390,13 +396,16 @@ class cami():
if self.debug: if self.debug:
print('Saving the results...') print('Saving the results...')
with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile: with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+ outputfile.write(
f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n' +
f'initially: {self.initial_seed_lst}\n') f'initially: {self.initial_seed_lst}\n')
outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n') outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n')
all_vertices = cami_vertices.union(putative_vertices) all_vertices = cami_vertices.union(putative_vertices)
for vertex in all_vertices: for vertex in all_vertices:
outputfile.write(f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n') outputfile.write(
if self.debug: print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv') f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
if self.debug: print(
f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
# save the predictions made by cami # save the predictions made by cami
ncbi_url = ('\tncbi_url' if self.ncbi else '') ncbi_url = ('\tncbi_url' if self.ncbi else '')
...@@ -414,7 +423,8 @@ class cami(): ...@@ -414,7 +423,8 @@ class cami():
summary = '' summary = ''
else: else:
url, summary = '', '' url, summary = '', ''
outputfile.write(f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n') outputfile.write(
f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
# # save the whole module # # save the whole module
# whole_module = [] # whole_module = []
...@@ -429,7 +439,6 @@ class cami(): ...@@ -429,7 +439,6 @@ class cami():
# print(f'saved {cami_method} output in: {cami_method}_output_{self.uid}.tsv') # print(f'saved {cami_method} output in: {cami_method}_output_{self.uid}.tsv')
# print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/{cami_method}_module_{self.uid}.txt') # print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/{cami_method}_module_{self.uid}.txt')
# save predicted modules by all other tools # save predicted modules by all other tools
for tool in self.result_module_sets: for tool in self.result_module_sets:
with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile: with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile:
...@@ -439,7 +448,6 @@ class cami(): ...@@ -439,7 +448,6 @@ class cami():
if self.debug: if self.debug:
print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv') print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
def use_nvenn(self): def use_nvenn(self):
"""Create Venn Diagrams via a external tool named degradome. """Create Venn Diagrams via a external tool named degradome.
Sends a request via requests to the degradome server. Sends a request via requests to the degradome server.
...@@ -503,4 +511,3 @@ class cami(): ...@@ -503,4 +511,3 @@ class cami():
for seed in removed_seeds: for seed in removed_seeds:
self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0 self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0
return removed_seeds return removed_seeds
import sys import sys
from collections import defaultdict from collections import defaultdict
import graph_tool as gt import graph_tool as gt
from utils.networks import trustrank, betweenness, must from utils.networks import trustrank, betweenness, must, closeness
...@@ -38,12 +38,15 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t ...@@ -38,12 +38,15 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
if ranking_method == 'trustrank': if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness': elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights) scores = betweenness(subnet, hub_penalty, weights)
elif ranking_method == 'must': elif ranking_method == 'must':
scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees) scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees)
elif ranking_method == 'harmonic':
scores = closeness(subnet, hub_penalty, weights)
putative_scores = scores.a[[int(id) for id in putative_vertices]] putative_scores = scores.a[[int(id) for id in putative_vertices]]
putative_scores.sort() putative_scores.sort()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
for v in putative_vertices: for v in putative_vertices:
if scores.a[int(v)] >= threshold and scores.a[int(v)] > 0: if scores.a[int(v)] >= threshold and scores.a[int(v)] > 0:
......
import sys import sys
from collections import defaultdict from collections import defaultdict
from utils.networks import trustrank, betweenness, must from utils.networks import trustrank, betweenness, must, closeness
import graph_tool as gt import graph_tool as gt
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically? # TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params): def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
damping_factor = params['damping_factor'] damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty'] hub_penalty = params['hub_penalty']
confidence_level = params['confidence_level'] confidence_level = params.get('confidence_level', 0.5)
weighted = 'weighted' in params and params['weighted'] weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank' ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees', 5) trees = params.get('trees', 5)
...@@ -40,9 +41,11 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t ...@@ -40,9 +41,11 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
if ranking_method == 'trustrank': if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness': elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights) scores = betweenness(subnet, hub_penalty, weights)
elif ranking_method == 'must': elif ranking_method == 'must':
scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance) scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance)
elif ranking_method == 'harmonic':
scores = closeness(subnet, hub_penalty, weights)
tool_scores[tool] = scores tool_scores[tool] = scores
putative_score_map = defaultdict(lambda: 0) putative_score_map = defaultdict(lambda: 0)
......
...@@ -9,5 +9,5 @@ chdir((sys.argv[0].rsplit('/', 1))[0]) ...@@ -9,5 +9,5 @@ chdir((sys.argv[0].rsplit('/', 1))[0])
networkfile = "../data/input/networks/example_network.tsv" networkfile = "../data/input/networks/example_network.tsv"
seedfile = "../data/input/seeds/example_seeds.txt" seedfile = "../data/input/seeds/example_seeds.txt"
identifier = "example_run" identifier = "example_run"
command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -d -p --f;' command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -p --f;'
subprocess.call(command, shell=True) subprocess.call(command, shell=True)
\ No newline at end of file
...@@ -9,6 +9,7 @@ import graph_tool.topology as gtt ...@@ -9,6 +9,7 @@ import graph_tool.topology as gtt
import graph_tool.util as gtu import graph_tool.util as gtu
import itertools as it import itertools as it
def edge_weights(g, base_weigths, hub_penalty, inverse=False): def edge_weights(g, base_weigths, hub_penalty, inverse=False):
avdeg = gts.vertex_average(g, "total")[0] avdeg = gts.vertex_average(g, "total")[0]
weights = g.new_edge_property("double", val=avdeg) weights = g.new_edge_property("double", val=avdeg)
...@@ -124,6 +125,7 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): ...@@ -124,6 +125,7 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
return g2 return g2
def find_bridges(g): def find_bridges(g):
r"""Finds all bridges in a graph.""" r"""Finds all bridges in a graph."""
global __time global __time
...@@ -139,6 +141,7 @@ def find_bridges(g): ...@@ -139,6 +141,7 @@ def find_bridges(g):
__dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge) __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge)
return is_bridge return is_bridge
def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge):
visited[node] = True visited[node] = True
global __time global __time
...@@ -246,19 +249,27 @@ def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None): ...@@ -246,19 +249,27 @@ def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
return scores return scores
def betweenness(g, seed_ids, hub_penalty, weights=None): def betweenness(g, hub_penalty, weights=None):
if gt.openmp_enabled(): if gt.openmp_enabled():
gt.openmp_set_num_threads(6) gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True) weights = edge_weights(g, weights, hub_penalty, inverse=True)
scores = g.new_vertex_property("float") # Call graph-tool to compute TrustRank.
all_pairs = [(source, target) for source in seed_ids for target in seed_ids if source < target] # trust = g.new_vertex_property("double")
for source, target in all_pairs: scores, _ = gtc.betweenness(g, weight=weights)
local_scores = g.new_vertex_property("float") # Compute and return the results.
num_paths = 0.0
for path in gtt.all_shortest_paths(g, source, target, weights=weights):
local_scores.a[path[1:-1]] += 1
num_paths += 1
if num_paths > 0:
local_scores.a /= num_paths
scores.a += local_scores.a
return scores return scores
def closeness(g, hub_penalty, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
# Call graph-tool to compute TrustRank.
# trust = g.new_vertex_property("double")
scores = gtc.closeness(g, weight=weights, harmonic=True)
# Compute and return the results.
return scores
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment