Select Git revision
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
cami_suite.py 22.29 KiB
import threading, biodigest, os
from utils import drugstone, degradome, ncbi
from algorithms.DiamondWrapper import DiamondWrapper
from algorithms.DominoWrapper import DominoWrapper
from algorithms.RobustWrapper import RobustWrapper
from configparser import ConfigParser
import preprocess
from consensus import cami_v1, cami_v2, cami_v3
def list_combinations(lst, k):
"""creates all possible combinations of length k with two objects in a list
:param lst: a list with length 2
:type lst: list()
:param k: length of the combinations
:type k: int
"""
nof_combs = int(2 ** k)
l = int(nof_combs / 2)
columns = []
while l >= 1:
column = []
while len(column) < nof_combs:
for _ in range(l):
column.append(lst[0])
for _ in range(l):
column.append(lst[1])
columns.append(column)
l = l/2
if l >= 1:
l=int(l)
combs = [tuple([column[i] for column in columns]) for i in range(nof_combs)]
assert len(set(combs)) == nof_combs
return(combs)
def initialize_cami(path_to_ppi_file=''):
cami_params = {}
# find homepath aka ~/cami
current_wd = os.getcwd()
current_wd_lst = current_wd.rsplit('/', 1)
current_folder = current_wd_lst[-1]
while current_folder != 'cami':
os.chdir(current_wd_lst[0])
current_wd = os.getcwd()
current_wd_lst = current_wd.rsplit('/', 1)
current_folder = current_wd_lst[-1]
home_path = current_wd
cami_params['home_path'] = home_path
cami_source = os.path.join(home_path, 'cami_src')
cami_params['cami_src_path'] = cami_source
# initialize tool wrappers
diamond = DiamondWrapper()
domino = DominoWrapper()
robust = RobustWrapper()
wrappers = [diamond, domino, robust]
nof_tools = len(wrappers)
cami_params['tool_wrappers'] = wrappers
# preprocessing
if path_to_ppi_file == '':
ppi_network = 'example_network.tsv'
ppi_file = os.path.join(home_path, f'data/input/networks/{ppi_network}')
else:
ppi_file = path_to_ppi_file
symbol_columns = [] # if the two symbol columns in the ppi_network file are not named
# ['Official_Symbol_Interactor_A', 'Official_Symbol_Interactor_B']
# provide the names here.
ppi_graph = preprocess.csv2graph(ppi_file, symbol_columns, nof_tools)
cami_params['ppi_graph'] = ppi_graph
# dictionary with name of the seeds and file
seed_directory = os.path.join(home_path, 'data/input/seeds')
seed_files = {'adhd':'adhd.tsv',\
'alcl':'alcl.tsv',\
'joubert':'joubert_syndrome.tsv'}
seed_paths = {}
for seed_file in seed_files:
seed_paths[seed_file] = os.path.join(seed_directory, seed_files[seed_file])
seed_lists = {seedname:preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths}
class cami():
""" A module that is used for Active Module identifaction based on a
consensus approach
"""
def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf', seed_score=10, parallelization=False):
"""Instance variables of CAMI
:param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
:type ppi_graph: Graph()
:param seed_lst: A list of vertices that are the seeds for the predictions
:type seed_lst: Graph().vertex()
:param tool_wrappers: A list of AlgorithmWrappers() that correspond to the tools used for the predictions
:type tool_wrappers: list(AlgorithmWrapper())
:param output_dir: The path to the directory where the results are supposed to be saved
:type output_dir: str
:param uid: Identifier for the current excecution of CAMI
:type uid: str
:param tmp_dir: Directory where temporary files should be saved
:type tmp_dir: str
:param home_path: Path to the cami home directory (gitlab repository)
:type home_path: str
"""
self.ppi_graph = ppi_graph
self.origin_ppi_graph = ppi_graph.copy()
self.ppi_vertex2gene = self.ppi_graph.vertex_properties["name"]
self.ppi_gene2vertex = {self.ppi_vertex2gene[vertex]:vertex for vertex in self.ppi_graph.vertices()}
self.initial_seed_lst = None
self.seed_lst = seed_lst
self.origin_seed_lst = seed_lst.copy()
self.tool_wrappers = tool_wrappers
self.output_dir = output_dir
self.home_path = home_path
self.uid = str(uid)
if tmp_dir == '':
tmp_dir = os.path.join(home_path, 'data', 'tmp', self.uid)
self.tmp_dir = tmp_dir
self.nof_tools = len(tool_wrappers)
self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices)
self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers}
self.code2toolname[0] = 'CAMI'
self.cami_vertices = []
self.ncbi = False
config = ConfigParser()
config.read('camiconf')
self.seed_score = config.get('cami', 'seed_score')
self.config = config
self.threaded = parallelization
# set weights for seed genes in ppi_graph
for seed in self.seed_lst:
self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score
def reset_cami(self, new_uid='', change_tmp=False):
if not new_uid == '':
self.uid = new_uid
if change_tmp:
new_tmp_dir = os.path.join(self.home_path,
'data',
self.uid)
os.makedirs(new_tmp_dir)
self.tmp_dir = new_tmp_dir
self.ppi_graph = self.origin_ppi_graph.copy()
self.result_gene_sets = {}
self.cami_vertices = []
self.seed_lst = self.origin_seed_lst.copy()
def set_initial_seed_lst(self, seedlst):
self.initial_seed_lst = seedlst
def initialize_tool(self, tool):
tool.set_ppi_network(self.ppi_graph)
tool.set_seeds(self.seed_lst)
tool.set_homepath(self.home_path)
tool.set_id(self.uid)
tool.set_config(self.config)
def initialize_all_tools(self):
for tool in self.tool_wrappers:
self.initialize_tool(tool)
def run_tool(self, tool):
"""Excecute the predictions using the AlgorithmWrapper() of a tool
:param tool: A tool that has the following methods: prepare_input, run_tool() and extract_output()
:type tool: AlgorithmWrapper()
:return: A set of predicted vertices by the used tool
:rtype: set()
"""
tool.create_tmp_output_dir(self.tmp_dir) # creates the temporary input directory
print(f"preparing {tool.name} input...")
inputparams = tool.prepare_input()
print(f'running {tool.name}...')
preds = set(tool.run_algorithm(inputparams))
print(f'{tool.name} predicted {len(preds)} active vertices (seeds not excluded):')
print(preds)
return preds
def make_evaluation(self):
print(self.result_gene_sets)
biodigest.setup.main(setup_type="api")
for result_set in self.result_gene_sets:
validation_results = biodigest.single_validation.single_validation(
tar=set(self.result_gene_sets[result_set]),
tar_id='entrez',
mode='set-set',
distance='jaccard',
ref=set(self.seed_lst),
ref_id='entrez')
if validation_results['status'] == 'ok':
biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir)
biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results,
mode='set-set',
tar=set(self.result_gene_sets[result_set]),
tar_id='entrez',
out_dir=self.output_dir,
prefix=f'{result_set}_{self.uid}')
def run_threaded_tool(self, tool, pred_sets):
"""run a tool in one thread and save the results into a dictionary pred_sets
Args:
tool (AlgorithmWrapper): Wrapper class for a tool
pred_sets (dict): a dictionary that maps a tool to its result set
"""
preds = self.run_tool(tool)
pred_sets[tool] = preds #- seed_set
def make_predictions(self) -> dict:
"""create all predictions using the tools specified in tool_wrappers
:return: A dictionary that saves the predicted vertices with respect
to the corresponding tool
:rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
"""
print(f'Creating result sets of all {self.nof_tools} tools...')
pred_sets = {tool:None for tool in self.tool_wrappers}
if self.threaded:
threads = [threading.Thread(target=self.run_threaded_tool, args=(tool, pred_sets,))
for tool in self.tool_wrappers]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
else:
for tool in self.tool_wrappers:
pred_sets[tool] = self.run_tool(tool)
assert(list(pred_sets.values()).count(None) < 1)
result_sets = {tool:set([self.ppi_graph.vertex(idx) for idx in pred_sets[tool]])
for tool in pred_sets}
return result_sets
def take_custom_results(self, inputfiles, result_sets={}):
"""Takes a list of inputfiles and extracts the results from them to
include them in the consensus with the tools of CAMI
:param inputfiles: A list of dictionaries with the following properties:
key: The used tool name
values: the paths to result files of these tools
:type inputfiles: list(dict)
:return: A dictionary that saves the predicted vertices with respect
to the corresponding tool
:rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
"""
for tool in inputfiles:
result_list = []
with open(inputfiles[tool]) as rfile:
for idx, line in enumerate(rfile):
if idx == 0:
tool.name = line.strip()
self.code2toolname[tool.code] = tool.name
else:
node = line.strip()
if node in self.ppi_gene2vertex:
result_list.append(self.ppi_gene2vertex[node])
result_sets[tool] = set(result_list)
return result_sets
def create_consensus(self, result_sets):
"""takes a set of active module predictions and creates a consensus
that combines all the results of the different tools.
:param result_sets: A dictionary with the following properties:
key: The used tool as AlgorithmWrapper() Object
values: Set of vertices in the ppi_graph that were
predicted by the key-tool
:type result_sets: {AlgorithmWrapper(): {Graph().vertex()}}
"""
# calculate gene weights
# set of all result genes
cami_scores = self.ppi_graph.vertex_properties["cami_score"]
predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
consens_threshold = min(self.nof_tools, 2)
ppi_graph = self.ppi_graph
seed_list = self.seed_lst
tool_name_map = self.code2toolname
gene_name_map = self.ppi_vertex2gene
camis = {
'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}},
'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
}},
'cami_v2_param1_bc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
}},
'cami_v2_param1_m': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
}},
'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
}},
'cami_v2_param2_m': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'ranking': 'must',
}},
'cami_v2_param2_bc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
}},
'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
}},
'cami_v3_param1_bc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
}},
'cami_v3_param1_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
}},
'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'trustrank'
}},
'cami_v3_param2_bc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
}},
'cami_v3_param2_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must'
}},
'cami_v3_param3_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must', 'trees': 15
}},
}
for cami_method_name, cami_params in camis.items():
print("Running " + cami_method_name)
cami_vertices, putative_vertices, codes2tools = cami_params['function'](result_sets, ppi_graph, seed_list,
predicted_by, cami_scores,
tool_name_map,
cami_params['params'])
# sort the resulting vertices according to their cami_score
cami_vlist = sorted(cami_vertices, key=lambda v: cami_scores[v], reverse=True)
seed_genes = [self.ppi_vertex2gene[seed_vertex] for seed_vertex in seed_list]
# translate the resulting vertex() ids to the corresponding names in the ppi network
cami_genes = [self.ppi_vertex2gene[cami_vertex] for cami_vertex in cami_vlist]
print(f'With the given seed genes: {seed_genes} \n' +
f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):')
for vertex in cami_vlist:
print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}')
# for visualization
self.result_gene_sets[cami_method_name] = cami_genes
if cami_method_name == 'cami_v1':
# for drugstone
self.cami_vertices = cami_vlist
# save the results in outputfiles
self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, result_sets, cami_scores)
def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, result_sets, cami_scores):
# save all predictions by all tools
print('Saving the results...')
with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+
f'initially: {self.initial_seed_lst}\n')
outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n')
all_vertices = cami_vertices.union(putative_vertices)
for vertex in all_vertices:
outputfile.write(f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
# save the predictions made by cami
ncbi_url = ('\tncbi_url' if self.ncbi else '')
ncbi_summary = ('\tncbi_summary' if self.ncbi else '')
with open(f'{self.output_dir}/CAMI_output_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n')
for vertex in cami_vlist:
if self.ncbi:
url, summary = ncbi.send_request(gene_name_map[vertex])
url = '\t' + url
if summary is not None:
summary = '\t' + summary
else:
summary = ''
else:
url, summary = '',''
outputfile.write(f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
# save the whole module
whole_module = []
with open(f'{self.output_dir}/CAMI_module_{cami_method}_{self.uid}.txt', 'w') as modfile:
for vertex in seed_genes:
modfile.write(f'{vertex}\n')
whole_module.append(vertex)
for vertex in cami_genes:
modfile.write(f'{vertex}\n')
whole_module.append(vertex)
print(f'saved cami output in: {self.output_dir}/CAMI_output_{self.uid}.tsv')
print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/CAMI_nodes_{cami_method}_{self.uid}.txt')
# transform all vertex indices to their corresponding gene names in a result set
for tool in result_sets:
self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]])
# save predictions by the other tools
for tool in self.result_gene_sets:
with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile:
outputfile.write('gene\n')
for gene in self.result_gene_sets[tool]:
outputfile.write(f'{gene}\n')
print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
# for drugstone
self.cami_vertices = cami_vlist
# return values
consensus = {}
consensus['module'] = whole_module
consensus['seeds'] = self.seed_lst
def use_nvenn(self):
"""Create Venn Diagrams via a external tool named degradome.
Sends a request via requests to the degradome server.
Returns the URL of the result.
"""
# visualize with degradome
if self.nof_tools < 7:
print('Visualizing results using Degradome...')
degradome_sets = {tool:self.result_gene_sets[tool]
for tool in self.result_gene_sets
if len(self.result_gene_sets[tool])>0}
url = degradome.send_request(degradome_sets, {self.ppi_vertex2gene[seed] for seed in self.seed_lst})
with open(f'{self.output_dir}/venn_link_{self.uid}.txt', 'w') as f:
f.write(url)
return url
# elif nof_tools == 6:
# print('Visualizing using Degradome...(seeds excluded from results)')
# # degradome_sets = result_sets.copy()
# # degradome_sets['CAMI'] = set(result_genes)
# url = degradome.send_request(degradome_sets)
# webbrowser.open(url)
else:
print('Cannot use degradome to create venn diagrams of 6 or more tools')
def download_diagram(self, url):
venn_name = f'{self.output_dir}/vdiagram_{self.uid}'
response = degradome.download_image(url, venn_name + '.png')
if response is not None:
with open(f'{venn_name}.html', 'w') as r:
r.write(response.html.html)
def use_drugstone(self):
symbol = self.ppi_graph.vertex_properties["symbol"]
cami_module = self.cami_vertices + self.seed_lst
cami_symbols = [symbol[vertex] for vertex in cami_module]
cami_symbols.append
cami_symbol_edges = []
for vertex in self.cami_vertices:
for edge in vertex.all_edges():
cami_symbol_edges.append((symbol[edge.source()], symbol[edge.target()]))
#print(list(set(cami_symbol_edges)))
url = drugstone.send_request(cami_symbols, cami_symbol_edges)
print(f'You can find a network visualization of the CAMI module via: {url}')
print('The link was also saved in the outputfolder for later.')
with open(f'{self.output_dir}/drugstone_link_{self.uid}.txt', 'w') as f:
f.write(url)
return url
def remove_seeds(self, idx_lst):
"""remove seeds at indices idx
Args:
idx_lst (lst): list of indices to be removed
"""
removed_seeds = [self.seed_lst[idx] for idx in idx_lst]
self.seed_lst = [seed for seed in self.seed_lst if seed not in removed_seeds]
for seed in self.seed_lst:
self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score
for seed in removed_seeds:
self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0
return removed_seeds