HHotNetWrapper.py

#TODO: write (sh?) file for main running (HotNet has to be run manually through
# different steps/ programs) --> example.sh can be used, sources have to be
# changed.

import subprocess, os
from sys import stdout
from algorithms.AlgorithmWrapper import AlgorithmWrapper
import multiprocessing as mp
import itertools as itt


class HHotNetWrapper(AlgorithmWrapper):
    def __init__(self):
        super().__init__()
        self.name = 'HHotNet'
        self.code = 4

    def permute_scores(self, scores_file, path_bins, path_permuted_scores):
        hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
        hotnet = f'cd {hotnet_path}; python permute_scores.py'
        command = f'{hotnet} -i {scores_file} -bf {path_bins} -o {path_permuted_scores}'
        subprocess.call(command, shell=True, stdout=subprocess.PIPE)

    def construct_hierarchy(path_sim_matrix, map_file, scores_file, path_h_ggi, path_h_map):
        hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
        hotnet = f'cd {hotnet_path}; python construct_hierarchy.py'
        command = f'{hotnet} -smf {path_sim_matrix} -igf {map_file} -gsf {scores_file} -helf {path_h_ggi} -higf {path_h_map}'
        subprocess.call(command, shell=True, stdout=subprocess.PIPE)


    def run_algorithm(self, inputparams):
      """Run Hierachical HotNet algorithm

        :param inputparams: list of inputparameters, in this case the paths to
                            the ppi and seeds
        :type inputparams: list(str)
        :return: list of predicted nodes
        :rtype: list(str)
      """
      ppi, maps, scores = inputparams[0], inputparams[1], inputparams[2]
      hotnet_path = os.path.join(self.home_path, 'tools/HHotNet/src')
      # Compile Fortran module and set number of cores.
      command = f'cd {hotnet_path}; f2py -c fortran_module.cpython-37m-darwin.so -m fortran_module.cpython-37m-darwin.so > /dev/null'
      subprocess.call(command, shell=True, stdout=subprocess.PIPE)
      num_cores = 34


      # Construct similarity matrix.
      print('Construct similarity matrix.')
      hotnet = f'cd {hotnet_path}; python construct_similarity_matrix.py'
      path_sim_matrix = os.path.join(self.output_dir, self.name_file('sim_matrix', 'h5'))
      command = f'{hotnet} -i {ppi} -o {path_sim_matrix}'
      subprocess.run(command, shell=True, capture_output=True)
      subprocess.call(command, shell=True, stdout=subprocess.PIPE)


      # Find permutation bins.
      hotnet = f'cd {hotnet_path}; python find_permutation_bins.py'
      path_bins = os.path.join(self.output_dir, self.name_file('bins', 'tsv'))
      command = f'{hotnet} -elf {ppi} -igf {maps} -gsf {scores} -o {path_bins}'
      subprocess.call(command, shell=True, stdout=subprocess.PIPE)

      # Permute the scores in parallel.
      print('Permute the scores in parallel.')
      pool = mp.Pool(num_cores)
      paths_permuted_scores = [os.path.join(self.output_dir, self.name_file(f'scores_{i}', 'tsv')) for i in range(100)]
      args = list(itt.product([scores], [path_bins], paths_permuted_scores))
      pool.starmap(self.permute_scores, args)

      # Construct the hierarchies in parallel.
      print('Construct the hierarchies in parallel.')
      path_h_ppi = os.path.join(self.output_dir, self.name_file('h_ppi', 'tsv'))
      path_h_map = os.path.join(self.output_dir, self.name_file('hotnet_h_map', 'tsv'))
      paths_permuted_h_ppis = [os.path.join(self.output_dir, self.name_file(f'h_ppi_{i}', 'tsv')) for i in range(100)]
      paths_permuted_h_maps = [os.path.join(self.output_dir, self.name_file(f'h_map_{i}', 'tsv')) for i in range(100)]
      args = [(path_sim_matrix, maps, paths_permuted_scores[i], paths_permuted_h_ppis[i], paths_permuted_h_maps[i])
                for i in range(100)]
      args.append((path_sim_matrix, maps, scores, path_h_ppi, path_h_map))
      pool.starmap(self.construct_hierarchy, args)

      # Process the hierarchies.
      print('Process the hierarchies.')
      hotnet = f'cd {hotnet_path}; python process_hierarchies.py'
      algo_output = os.path.join(self.output_dir, self.name_file('results', 'tsv'))
      pelf_str = ' '.join([f'{path}' for path in paths_permuted_h_ggis])
      pigf_str = ' '.join([f'{path}' for path in paths_permuted_h_maps])
      command = f'{hotnet} -oelf {path_h_ppi} -oigf {path_h_map} -pelf {pelf_str} -pigf {pigf_str} -cf {algo_output} -nc {num_cores}'
      subprocess.call(command, shell=True, stdout=subprocess.PIPE)

      print(f'HHotNet results saved in {algo_output}')

      return self.extract_output(algo_output)

    def prepare_input(self):
      #index2gene = self.ppi_network.vertex_properties["name"]
      index2score = self.ppi_network.vertex_properties["cami_score"]

      ppi_filename = self.name_file('ppi', 'tsv')
      ppi_path = os.path.join(self.output_dir, ppi_filename)
      map_filename = self.name_file('map', 'tsv')
      map_path = os.path.join(self.output_dir, map_filename)
      scores_filename = self.name_file('scores', 'tsv')
      scores_file = os.path.join(self.output_dir, scores_filename)

      with open(ppi_path, 'w') as ppi_file, open(map_path, 'w') as map_file, open(scores_file, 'w') as scores_file:
          first_non_isolated_node = -1
          last_non_isolated_node = -1
          for vertex in self.ppi_network.vertices():
            if vertex.out_degree() > 0:
              last_non_isolated_node = int(vertex)
              if first_non_isolated_node == -1:
                first_non_isolated_node = int(vertex)
          for node in range(first_non_isolated_node, last_non_isolated_node + 1):
            gene_index = node - first_non_isolated_node + 1
            gene_id = node
            map_file.write(f'{gene_index}\t{gene_id}\n')
            scores_file.write(f'{gene_index}\t{index2score[node]}\n')
          for u, v in self.ppi_network.edges():
            ppi_file.write(f'{int(u) - first_non_isolated_node + 1}\t{int(v) - first_non_isolated_node + 1}\n')
      inputparams = [ppi_path, map_path, scores_file]
      return inputparams
        # path_ggi = f'../temp/network_1_edge_list.tsv'
        # map_file = f'../temp/network_1_index_gene.tsv'
        # scores_file = f'../temp/scores_1.tsv'

    def extract_output(self, algo_output):
        nodes = []
        with open(algo_output, 'r') as hotnet_results:
          for line in hotnet_results:
            if not line.startswith('#'):
              nodes = line.strip().split('\t')
              break
        return nodes #vorhergesagte Knoten, Idizes!!


          ##Was gibt HotNet aus? Gene oder Indizes?