Select Git revision
api_python.ipynb
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
HHotNetWrapper.py 6.51 KiB
#TODO: write (sh?) file for main running (HotNet has to be run manually through
# different steps/ programs) --> example.sh can be used, sources have to be
# changed.
import subprocess, os
from sys import stdout
from algorithms.AlgorithmWrapper import AlgorithmWrapper
import multiprocessing as mp
import itertools as itt
class HHotNetWrapper(AlgorithmWrapper):
def __init__(self):
super().__init__()
self.name = 'HHotNet'
self.code = 4
def permute_scores(self, scores_file, path_bins, path_permuted_scores):
hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
hotnet = f'cd {hotnet_path}; python permute_scores.py'
command = f'{hotnet} -i {scores_file} -bf {path_bins} -o {path_permuted_scores}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
def construct_hierarchy(path_sim_matrix, map_file, scores_file, path_h_ggi, path_h_map):
hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
hotnet = f'cd {hotnet_path}; python construct_hierarchy.py'
command = f'{hotnet} -smf {path_sim_matrix} -igf {map_file} -gsf {scores_file} -helf {path_h_ggi} -higf {path_h_map}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
def run_algorithm(self, inputparams):
"""Run Hierachical HotNet algorithm
:param inputparams: list of inputparameters, in this case the paths to
the ppi and seeds
:type inputparams: list(str)
:return: list of predicted nodes
:rtype: list(str)
"""
ppi, maps, scores = inputparams[0], inputparams[1], inputparams[2]
hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
# Compile Fortran module and set number of cores.
command = f'cd {hotnet_path}; f2py -c fortran_module.cpython-37m-darwin.so -m fortran_module.cpython-37m-darwin.so > /dev/null'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
num_cores = 34
# Construct similarity matrix.
print('Construct similarity matrix.')
hotnet = f'cd {hotnet_path}; python construct_similarity_matrix.py'
path_sim_matrix = os.path.join(self.output_dir, self.name_file('sim_matrix', 'h5'))
command = f'{hotnet} -i {ppi} -o {path_sim_matrix}'
subprocess.run(command, shell=True, capture_output=True)
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
# Find permutation bins.
hotnet = f'cd {hotnet_path}; python find_permutation_bins.py'
path_bins = os.path.join(self.output_dir, self.name_file('bins', 'tsv'))
command = f'{hotnet} -elf {ppi} -igf {maps} -gsf {scores} -o {path_bins}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
# Permute the scores in parallel.
print('Permute the scores in parallel.')
pool = mp.Pool(num_cores)
paths_permuted_scores = [os.path.join(self.output_dir, self.name_file(f'scores_{i}', 'tsv')) for i in range(100)]
args = list(itt.product([scores], [path_bins], paths_permuted_scores))
pool.starmap(self.permute_scores, args)
# Construct the hierarchies in parallel.
print('Construct the hierarchies in parallel.')
path_h_ppi = os.path.join(self.output_dir, self.name_file('h_ppi', 'tsv'))
path_h_map = os.path.join(self.output_dir, self.name_file('hotnet_h_map', 'tsv'))
paths_permuted_h_ppis = [os.path.join(self.output_dir, self.name_file(f'h_ppi_{i}', 'tsv')) for i in range(100)]
paths_permuted_h_maps = [os.path.join(self.output_dir, self.name_file(f'h_map_{i}', 'tsv')) for i in range(100)]
args = [(path_sim_matrix, maps, paths_permuted_scores[i], paths_permuted_h_ppis[i], paths_permuted_h_maps[i])
for i in range(100)]
args.append((path_sim_matrix, maps, scores, path_h_ppi, path_h_map))
pool.starmap(self.construct_hierarchy, args)
# Process the hierarchies.
print('Process the hierarchies.')
hotnet = f'cd {hotnet_path}; python process_hierarchies.py'
algo_output = os.path.join(self.output_dir, self.name_file('results', 'tsv'))
pelf_str = ' '.join([f'{path}' for path in paths_permuted_h_ggis])
pigf_str = ' '.join([f'{path}' for path in paths_permuted_h_maps])
command = f'{hotnet} -oelf {path_h_ppi} -oigf {path_h_map} -pelf {pelf_str} -pigf {pigf_str} -cf {algo_output} -nc {num_cores}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
print(f'HHotNet results saved in {algo_output}')
return self.extract_output(algo_output)
def prepare_input(self):
#index2gene = self.ppi_network.vertex_properties["name"]
index2score = self.ppi_network.vertex_properties["cami_score"]
ppi_filename = self.name_file('ppi', 'tsv')
ppi_path = os.path.join(self.output_dir, ppi_filename)
map_filename = self.name_file('map', 'tsv')
map_path = os.path.join(self.output_dir, map_filename)
scores_filename = self.name_file('scores', 'tsv')
scores_file = os.path.join(self.output_dir, scores_filename)
with open(ppi_path, 'w') as ppi_file, open(map_path, 'w') as map_file, open(scores_file, 'w') as scores_file:
first_non_isolated_node = -1
last_non_isolated_node = -1
for vertex in self.ppi_network.vertices():
if vertex.out_degree() > 0:
last_non_isolated_node = int(vertex)
if first_non_isolated_node == -1:
first_non_isolated_node = int(vertex)
for node in range(first_non_isolated_node, last_non_isolated_node + 1):
gene_index = node - first_non_isolated_node + 1
gene_id = node
map_file.write(f'{gene_index}\t{gene_id}\n')
scores_file.write(f'{gene_index}\t{index2score[node]}\n')
for u, v in self.ppi_network.edges():
ppi_file.write(f'{int(u) - first_non_isolated_node + 1}\t{int(v) - first_non_isolated_node + 1}\n')
inputparams = [ppi_path, map_path, scores_file]
return inputparams
# path_ggi = f'../temp/network_1_edge_list.tsv'
# map_file = f'../temp/network_1_index_gene.tsv'
# scores_file = f'../temp/scores_1.tsv'
def extract_output(self, algo_output):
nodes = []
with open(algo_output, 'r') as hotnet_results:
for line in hotnet_results:
if not line.startswith('#'):
nodes = line.strip().split('\t')
break
return nodes #vorhergesagte Knoten, Idizes!!
##Was gibt HotNet aus? Gene oder Indizes?