Skip to content
Snippets Groups Projects
Commit fe7bf945 authored by Le, Mia's avatar Le, Mia
Browse files

incorportation of external results implemented, HOTNET still in debugging phase

parent 8738ad38
Branches
No related tags found
No related merge requests found
......@@ -15,6 +15,7 @@ class AlgorithmWrapper(object):
self.seeds = list()
self.home_path = ''
self.config = 'camiconf'
self.code = 99
def set_weight(self, weight):
self.weight = weight
......
......@@ -29,17 +29,18 @@ class DiamondWrapper(AlgorithmWrapper):
# path to diamond
diamond_path = os.path.join(self.home_path, 'tools/DIAMOnD/')
diamond = f'cd {diamond_path}; python DIAMOnD.py'
diamond = f'cd "{diamond_path}"; python DIAMOnD.py'
ppi = inputparams[0] # path to ppi inputfile
seeds = inputparams[1] # path to seed inputfile
nof_predictions = inputparams[2] # how many active genes should be predicted
out_filename = self.name_file('out') # name the outputfile
algo_output = os.join(self.output_dir, out_filename) # specify the output location
algo_output = os.path.join(self.output_dir, out_filename) # specify the output location
#MC:
#CONFIG alpha = 1
command = f'{diamond} {ppi} {seeds} {nof_predictions} {self.alpha} {algo_output}'
command = f'{diamond} "{ppi}" "{seeds}" {nof_predictions} {self.alpha} "{algo_output}"'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
assert os.path.exists(algo_output), f'DIAMOnD failed to save output to {algo_output}'
print(f"DIAMOnD results saved in {algo_output}")
return self.extract_output(algo_output)
......@@ -61,6 +62,7 @@ class DiamondWrapper(AlgorithmWrapper):
for edge in self.ppi_network.edges():
file.write(f"{str(edge.source())},{str(edge.target())}\n")
inputparams.append(ppi_file)
assert os.path.exists(ppi_file), f'Could create PPI-network file "{ppi_file}"'
print(f'{self.name} ppi is saved in {ppi_file}')
# create seed file
......@@ -68,6 +70,7 @@ class DiamondWrapper(AlgorithmWrapper):
with open(seed_file, "w") as file:
for seed in self.seeds:
file.write(f"{seed}\n")
assert os.path.exists(seed_file), f'Could create seed file "{seed_file}"'
print(f'{self.name} seeds are saved in {seed_file}')
inputparams.append(seed_file)
......@@ -85,7 +88,7 @@ class DiamondWrapper(AlgorithmWrapper):
:param algo_output: path to outputfile
:type algo_output: str
:return: list of indices
:return: list of indicesb
:rtype: list(int)
"""
nodes = []
......
......@@ -45,6 +45,7 @@ class DominoWrapper(AlgorithmWrapper):
-o "{self.output_dir}" -v {self.visualization_flag} -p {self.parallels} --use_cache {self.c}'
run = subprocess.run(command, shell=True, capture_output=True)
match = re.search("( final modules are reported at )(.*)(\n)", run.stdout.decode('utf-8'))
assert(match!=None), 'DOMINO could not find any modules'
algo_output = match.group(2)
#MC:
#CONFIG output_name = 'modules.out'
......
......@@ -5,6 +5,8 @@
import subprocess, os
from sys import stdout
from AlgorithmWrapper import AlgorithmWrapper
import multiprocessing as mp
import itertools as itt
class HHotNetWrapper(AlgorithmWrapper):
......@@ -13,6 +15,19 @@ class HHotNetWrapper(AlgorithmWrapper):
self.name = 'HHotNet'
self.code = 4
def permute_scores(self, scores_file, path_bins, path_permuted_scores):
hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
hotnet = f'cd {hotnet_path}; python permute_scores.py'
command = f'{hotnet} -i {scores_file} -bf {path_bins} -o {path_permuted_scores}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
def construct_hierarchy(path_sim_matrix, map_file, scores_file, path_h_ggi, path_h_map):
hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
hotnet = f'cd {hotnet_path}; python construct_hierarchy.py'
command = f'{hotnet} -smf {path_sim_matrix} -igf {map_file} -gsf {scores_file} -helf {path_h_ggi} -higf {path_h_map}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
def run_algorithm(self, inputparams):
"""Run Hierachical HotNet algorithm
......@@ -22,136 +37,99 @@ class HHotNetWrapper(AlgorithmWrapper):
:return: list of predicted nodes
:rtype: list(str)
"""
ppi, maps, scores = inputparams[0], inputparams[1], inputparams[2]
hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
# Compile Fortran module and set number of cores.
command = f'cd {hotnet_path}; f2py -c fortran_module.cpython-37m-darwin.so -m fortran_module.cpython-37m-darwin.so > /dev/null'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
num_cores = 34
# Construct similarity matrix.
print('Construct similarity matrix.')
hotnet = f'cd {hotnet_path}; python construct_similarity_matrix.py'
path_sim_matrix = os.path.join(self.output_dir, self.name_file('sim_matrix', 'h5'))
command = f'{hotnet} -i {ppi} -o {path_sim_matrix}'
subprocess.run(command, shell=True, capture_output=True)
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
# Find permutation bins.
hotnet = f'cd {hotnet_path}; python find_permutation_bins.py'
path_bins = os.path.join(self.output_dir, self.name_file('bins', 'tsv'))
command = f'{hotnet} -elf {ppi} -igf {maps} -gsf {scores} -o {path_bins}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
# Permute the scores in parallel.
print('Permute the scores in parallel.')
pool = mp.Pool(num_cores)
paths_permuted_scores = [os.path.join(self.output_dir, self.name_file(f'scores_{i}', 'tsv')) for i in range(100)]
args = list(itt.product([scores], [path_bins], paths_permuted_scores))
pool.starmap(self.permute_scores, args)
# Construct the hierarchies in parallel.
print('Construct the hierarchies in parallel.')
path_h_ppi = os.path.join(self.output_dir, self.name_file('h_ppi', 'tsv'))
path_h_map = os.path.join(self.output_dir, self.name_file('hotnet_h_map', 'tsv'))
paths_permuted_h_ppis = [os.path.join(self.output_dir, self.name_file(f'h_ppi_{i}', 'tsv')) for i in range(100)]
paths_permuted_h_maps = [os.path.join(self.output_dir, self.name_file(f'h_map_{i}', 'tsv')) for i in range(100)]
args = [(path_sim_matrix, maps, paths_permuted_scores[i], paths_permuted_h_ppis[i], paths_permuted_h_maps[i])
for i in range(100)]
args.append((path_sim_matrix, maps, scores, path_h_ppi, path_h_map))
pool.starmap(self.construct_hierarchy, args)
# Process the hierarchies.
print('Process the hierarchies.')
hotnet = f'cd {hotnet_path}; python process_hierarchies.py'
algo_output = os.path.join(self.output_dir, self.name_file('results', 'tsv'))
pelf_str = ' '.join([f'{path}' for path in paths_permuted_h_ggis])
pigf_str = ' '.join([f'{path}' for path in paths_permuted_h_maps])
command = f'{hotnet} -oelf {path_h_ppi} -oigf {path_h_map} -pelf {pelf_str} -pigf {pigf_str} -cf {algo_output} -nc {num_cores}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
path = self.home_path
output_dir = self.output_dir
sim_matrix = f'cd {path}/tools/HHotNet/src; \
python construct_similarity_matrix.py \
-i {output_dir}/edge_list_HHotNet.tsv\
-o {output_dir}/similarity_matrix.h5'
#TODO: if necessary, networks can be permuted at this point. If more scores,
# loops are necessary. At this point very simple only to see if it works
permute_scores = f'cd {path}/tools/HHotNet/src; \
python find_permutation_bins.py \
-gsf {output_dir}/heat_scores_HHotNet.tsv \
-igf {output_dir}/index_gene_HHotNet.tsv \
-elf {output_dir}/edge_list_HHotNet.tsv \
-o {output_dir}/permuted_scores.tsv'
create_hierarchies = f'cd {path}/tools/HHotNet/src; \
python construct_hierarchy.py \
-smf {output_dir}/similarity_matrix.h5 \
-igf {output_dir}/index_gene_HHotNet.tsv \
-gsf {output_dir}/heat_scores_HHotNet.tsv \
-helf {output_dir}/hierarchy_edge_list.tsv \
-higf {output_dir}/hierarchy_index_gene.tsv'
process_hierarchies = f'cd {path}/tools/HHotNet/src; \
python process_hierarchies.py \
-oelf {output_dir}/edge_list_HHotNet.tsv \
-oigf {output_dir}/index_gene_HHotNet.tsv \
-pelf {output_dir}/hierarchy_edge_list.tsv \
-pigf {output_dir}/hierarchy_index_gene.tsv\
-cf {output_dir}/clusters.tsv \
-pl network score \
-pf {output_dir}/heat_plot.pdf'
subprocess.call(sim_matrix, shell=True)
subprocess.call(permute_scores, shell=True)
subprocess.call(create_hierarchies,shell=True)
subprocess.call(process_hierarchies, shell=True)
algo_output = f'{output_dir}/clusters.tsv'
print(f'HHotNet results saved in {algo_output}')
return self.extract_output(algo_output)
def prepare_input(self):
'''
index_to_gene = [1: a, 2: b, 3: c,...]
edge_list = ((1,2), (1,3), (2,3),...)
heats_scores = ((1,0)(2,0)(3,1),...)
'''
path_to_dir = self.output_dir
ppi_network = self.ppi_network
seed_list = self.seeds
# The heat scores are 1 for every vertex included in the seed list and 0 for
# the rest of vertices in the ppi_network. The heat scores shall be labeled with
# the gene name instead of index
seeds = dict.fromkeys(seed_list, "1")
index_gene = []
heat_scores = {}
for k in seeds:
heat_scores[int(k)] = seeds[k]
ppi2hotnet_id_dict = {}
for i,vertex in enumerate(ppi_network.iter_vertices()): #ändern: Gennamen str()
ppi2hotnet_id_dict[int(vertex)] = i #we do not need the genenames, so simply indices,
# ML: just to be safe we should still translate the 'ppi-ids' to the 'hotnet-ids'
if int(vertex) not in seeds:
heat_scores[int(vertex)] = 0
# iter_edges: Return an iterator over the edge `(source, target) pairs, and optional edge
# properties list eprops. As for the edge_list we do not need the index, the last
# iterator can be ignored
edge_list = [] #edge list wth indices instead of gene names
for s,t in ppi_network.iter_edges():
edge_list.append((ppi2hotnet_id_dict[s],ppi2hotnet_id_dict[t]))
# iter_vertices: Return an interator ober the vertices. With vprops you can
# specify optional vertex properties
# save the prepared input files
#heat_scores
heat_filename = "heat_scores_HHotNet.tsv"
file_path1 = os.path.join(path_to_dir, heat_filename)
if not os.path.isdir(path_to_dir):
os.mkdir(path_to_dir)
heats = open(file_path1, 'w')
for gene in heat_scores:
heats.write(f'{gene}\t{heat_scores[gene]}\n')
heats.close
#index_gene
index_filename = "index_gene_HHotNet.tsv"
file_path2 = os.path.join(path_to_dir, index_filename)
if not os.path.isdir(path_to_dir):
os.mkdir(path_to_dir)
indices = open(file_path2, 'w')
for gene in sorted(ppi2hotnet_id_dict.items(), key=lambda item: item[1]):
indices.write(f'{ppi2hotnet_id_dict[gene[1]]}\t{gene[1]}\n')
indices.close
#edge list
edges_filename = "edge_list_HHotNet.tsv"
file_path3 = os.path.join(path_to_dir, edges_filename)
if not os.path.isdir(path_to_dir):
os.mkdir(path_to_dir)
edges = open(file_path3, 'w')
for edge in edge_list:
edges.write(f'{edge[0]}\t{edge[1]}\n')
edges.close
return
#index2gene = self.ppi_network.vertex_properties["name"]
index2score = self.ppi_network.vertex_properties["cami_score"]
ppi_filename = self.name_file('ppi', 'tsv')
ppi_path = os.path.join(self.output_dir, ppi_filename)
map_filename = self.name_file('map', 'tsv')
map_path = os.path.join(self.output_dir, map_filename)
scores_filename = self.name_file('scores', 'tsv')
scores_file = os.path.join(self.output_dir, scores_filename)
with open(ppi_path, 'w') as ppi_file, open(map_path, 'w') as map_file, open(scores_file, 'w') as scores_file:
first_non_isolated_node = -1
last_non_isolated_node = -1
for vertex in self.ppi_network.vertices():
if vertex.out_degree() > 0:
last_non_isolated_node = int(vertex)
if first_non_isolated_node == -1:
first_non_isolated_node = int(vertex)
for node in range(first_non_isolated_node, last_non_isolated_node + 1):
gene_index = node - first_non_isolated_node + 1
gene_id = node
map_file.write(f'{gene_index}\t{gene_id}\n')
scores_file.write(f'{gene_index}\t{index2score[node]}\n')
for u, v in self.ppi_network.edges():
ppi_file.write(f'{int(u) - first_non_isolated_node + 1}\t{int(v) - first_non_isolated_node + 1}\n')
inputparams = [ppi_path, map_path, scores_file]
return inputparams
# path_ggi = f'../temp/network_1_edge_list.tsv'
# map_file = f'../temp/network_1_index_gene.tsv'
# scores_file = f'../temp/scores_1.tsv'
def extract_output(self, algo_output):
nodes = []
with open(algo_output, "r") as output:
for line in output.readlines()[1:]:
nodes.append(line.split("\t")[1])
with open(algo_output, 'r') as hotnet_results:
for line in hotnet_results:
if not line.startswith('#'):
nodes = line.strip().split('\t')
break
return nodes #vorhergesagte Knoten, Idizes!!
......
......@@ -42,8 +42,8 @@ class RobustWrapper(AlgorithmWrapper):
# [5] reduction factor
# [6] number of steiner trees to be computed
# [7] threshold
robust_path = os.join(self.home_path, 'tools/robust')
robust = f'cd {robust_path}; python robust.py'
robust_path = os.path.join(self.home_path, 'tools/robust')
robust = f'cd "{robust_path}"; python robust.py'
ppi = inputparams[0]
seeds = inputparams[1]
......@@ -54,7 +54,7 @@ class RobustWrapper(AlgorithmWrapper):
#0.25 0.9 30 0.1
#MC:
#CONFIG according to robust documentation https://github.com/bionetslab/robust
command = f'{robust} {ppi} {seeds} {algo_output} \
command = f'{robust} "{ppi}" "{seeds}" "{algo_output}" \
{self.initialFraction} {self.reductionFactor} \
{self.numberSteinerTrees} {self.threshold}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
......
......@@ -3,11 +3,11 @@ from cProfile import label
import os
import sys
import shutil
from turtle import color
from DiamondWrapper import DiamondWrapper
from DominoWrapper import DominoWrapper
from RobustWrapper import RobustWrapper
from HHotNetWrapper import HHotNetWrapper
from AlgorithmWrapper import AlgorithmWrapper
import preprocess
import uuid
import cami_suite
......@@ -22,18 +22,36 @@ from configparser import ConfigParser
import ast
def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
output_dir, identifier, save_temps, nvenn, save_image, force, drugstone, ncbi, configuration, seed_variation, parallelization):
output_dir, identifier, save_temps, nvenn, save_image, force, drugstone, ncbi, configuration,
seed_variation, parallelization, external_results):
print('CAMI started')
config = ConfigParser()
config.read(configuration)
seed_score = config.get('cami', 'seed_score')
nof_tools = len(tools) + len(external_results)
nof_tools = len(tools)
ppi_graph = preprocess.csv2graph(ppi_network, symbol_columns=drugstone, nof_tools=nof_tools)
wrappers = {'diamond': DiamondWrapper(),
'domino': DominoWrapper(),
'robust': RobustWrapper(),
'hotnet': HHotNetWrapper()
}
tool_wrappers = [wrappers[tool] for tool in tools]
nof_cami_tools = len(wrappers)
seed_lst = preprocess.txt2lst(seeds)
if tool_weights == None:
tool_weights = nof_tools * [1]
ext_wrappers = []
if len(external_results) > 0:
for idx in range(len(external_results)):
dummy_wrapper = AlgorithmWrapper()
dummy_wrapper.code = idx + 1 + nof_cami_tools
ext_wrappers.append(dummy_wrapper)
ppi_graph = preprocess.csv2graph(ppi_network, symbol_columns=drugstone, nof_tools=nof_tools)
seed_lst = preprocess.txt2lst(seeds)
initial_seedlst = seed_lst.copy()
# rename seeds with their corresponding indices in the ppi_graph
......@@ -91,17 +109,6 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
print('Abort. Please try again.')
exit(1)
wrappers = {'diamond': DiamondWrapper(),
'domino': DominoWrapper(),
'robust': RobustWrapper(),
'hotnet': HHotNetWrapper()
}
tool_wrappers = [wrappers[tool] for tool in tools]
for idx, tool in enumerate(tool_wrappers):
tool.set_weight(float(tool_weights[idx]))
cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path, configuration, seed_score, parallelization)
if ncbi:
......@@ -110,10 +117,23 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
for tool in tool_wrappers:
cami.initialize_tool(tool)
for tool in ext_wrappers:
cami.initialize_tool(tool)
for idx, tool in enumerate(tool_wrappers):
tool.set_weight(float(tool_weights[idx]))
cami.set_initial_seed_lst(initial_seedlst)
if consensus or (not consensus and not evaluate and not seed_variation):
result_sets = cami.make_predictions()
if len(external_results) > 0:
assert(len(ext_wrappers) == len(external_results))
external_input = {}
for i in range(len(ext_wrappers)):
external_input[ext_wrappers[i]] = external_results[i]
result_sets = cami.take_custom_results(external_input, result_sets)
print(result_sets)
cami.create_consensus(result_sets)
if nvenn or save_image:
......@@ -386,6 +406,8 @@ if __name__ == "__main__":
parser.add_argument('-t', '--tools', nargs='+', action='store', default=["diamond", "domino", "robust"],
help="List of tools that the user wants to use for prediction.\n Available tools are:\n" + \
"domino\ndiamond\nrobust\hotnet\nThe default tools are: diamond, domino and robust.")
parser.add_argument('-ext', '--external_results', nargs='+', action='store', default=[],
help="List of paths to results generated by external tools. The files should contain the tool name in the first line and then the predicted genes (1 gene per line)")
parser.add_argument('-w', '--tool_weights', nargs='+', action='store', default=None,
help="List of weights for the tools. If you have [domino, diamond, robust] as list of tools and diamonds weight should be twice as high as the other tools type: 1 2 1")
parser.add_argument('-c', '--consensus', action='store_true', help="run only the consensus prediction part of cami")
......
......@@ -152,18 +152,30 @@ class cami():
for tool in pred_sets}
return result_sets
def take_custom_results(self, inputfiles):
def take_custom_results(self, inputfiles, result_sets={}):
"""Takes a list of inputfiles and extracts the results from them to
include them in the consensus with the tools of CAMI
:param inputfiles: [description]
:type inputfiles: [type]
:param inputfiles: A list of dictionaries with the following properties:
key: The used tool name
values: the paths to result files of these tools
:type inputfiles: list(dict)
:return: A dictionary that saves the predicted vertices with respect
to the corresponding tool
:rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
"""
result_sets = {}
print('Not implemented yet.')
for tool in inputfiles:
result_list = []
with open(inputfiles[tool]) as rfile:
for idx, line in enumerate(rfile):
if idx == 0:
tool.name = line.strip()
self.code2toolname[tool.code] = tool.name
else:
node = line.strip()
if node in self.ppi_gene2vertex:
result_list.append(self.ppi_gene2vertex[node])
result_sets[tool] = set(result_list)
return result_sets
def create_consensus(self, result_sets):
......@@ -186,6 +198,7 @@ class cami():
#parse every result set of each tool
for tool in result_sets:
print(f'{tool.name}: {tool.weight}')
result_sets[tool] -= set(self.seed_lst)
# TODO: Should we keep the seeds in the result sets?
# everytime a tool predicted a gene add 1 * the tool weight to its weight and add it to the result genes
......
......@@ -79,7 +79,7 @@ def name2index_dict(g):
index2name = g.vertex_properties["name"]
return {index2name[v]:v for v in g.vertices()}
# TODO: review this function, where is it used?
def compare_seeds_and_network(ppi_network, seeds):
seeds_in_network = []
seeds_not_in_network = []
......
......@@ -42,6 +42,7 @@ def run(args):
edge_list = load_edge_list(args.edge_list_file)
# Construct adjacency matrix.
print('construct adjacency matrix')
k = min(min(edge[:2]) for edge in edge_list)
l = max(max(edge[:2]) for edge in edge_list)
......@@ -54,6 +55,7 @@ def run(args):
A[i-k, j-k] = A[j-k, i-k] = weight
# Choose beta.
print('Choose beta')
if args.beta is None:
beta = balanced_beta(A, args.threshold, args.num_digits)
elif 0<args.beta<1:
......@@ -62,9 +64,11 @@ def run(args):
raise ValueError('{} invalid; beta must satisfy 0 < beta < 1.'.format(args.beta))
# Construct Hierarchical HotNet similarity matrix.
print('Construct similarity Matrix')
P = hh_similarity_matrix(A, beta)
# Save results.
# Save results
print('Save results.')
if args.output_file is not None:
save_matrix(args.output_file, P, args.name)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment