Added a TemplateWrapper for the integration of new Algorithms

81e8ebc8 · Le, Mia · 09ea20dd · 81e8ebc8
Commit 81e8ebc8 authored Apr 17, 2022 by Le, Mia
--- a/cami/TemplateWrapper.py
+++ b/cami/TemplateWrapper.py
+from AlgorithmWrapper import AlgorithmWrapper
+import subprocess, os
+
+#MC:
+from configparser import ConfigParser
+import ast
+
+class TemplateWrapper(AlgorithmWrapper):
+    def __init__(self):
+        """Each tool needs certain predefined instance variables:
+        The following variables are inherited from the AlgorithmWrapper Super Class:
+        - UID (string)
+        - weight (int)
+        - output directory (string)
+        - ppi_network (Graph())
+        - seeds (list())
+        - home_path (string)
+        - config (string)
+        There is no need to redefine these variables when introducing a new algorithm.
+        
+        The only instance variables that need to be defined for each tool individually are:
+        - name (string): The name of the algorithm/tool
+        - code (int): A unique integer code for this tool. Choose any number that is not taken by other tools yet (currently taken are: 0,1,2,3)
+        - any constant numbers or variables that are defined in the config file for this tool
+        """
+        super().__init__()
+        self.name = 'DIAMOnD'
+        self.code = 1
+        config = ConfigParser()
+        config.read(self.config)
+        self.alpha = int(config.get('diamond', 'alpha'))
+        self.pred_factor = int(config.get('diamond', 'pred_factor'))
+        self.max_preds = int(config.get('diamond', 'max_preds'))
+
+    def run_algorithm(self, inputparams):
+        """Execute the algorithm. Ideally the algorithm is a command line tool that takes
+        a single command for execution. Use the inputparams created by prepare_input() 
+        accordingly and call the command using the subprocess library. For example:
+        command = f'{<algorithm>} {inputparams[0]} {inputparams[2]} {additional_parameters}'
+        subprocess.call(command, shell=True, stdout=subprocess.PIPE)
+        
+        If the algorithm needs multiple steps, you can conduct them here too. The only important
+        thing is that this function returns the extracted output from the outputfile created by
+        the algorithm.
+        
+        Hint: Execute the algorithm and specify the outputfile name. From that outputfile 
+        extract the predicted Active Module by using extract_output.
+        
+
+        Args:
+            inputparams (list): A list of parameters for the algorithm that is defined via
+                                prepare_input()
+
+        Returns:
+            list: A list of resulting genes extracted from the generated output file
+        """
+        # example variables:
+        tool = 'example tool'
+        ppi = inputparams[0] # path to ppi inputfile
+        seeds = inputparams[1] # path to seed inputfile
+        out_filename = self.name_file('out') # name the outputfile
+        algo_output = f'{self.output_dir}/{out_filename}' # specify the output location
+        
+        # Conduct the algorithm
+        command = f'{tool} {ppi} {seeds}'
+        subprocess.call(command, shell=True, stdout=subprocess.PIPE)
+        print(f"{tool} results saved in {algo_output}")
+        return self.extract_output(algo_output)
+
+    def prepare_input(self):
+        """prepares the input ppi and seed genes as needed by the algorithm
+        generally, this function will write two new inputfiles as needed by the algorithm:
+        - a file with the seeds in the needed format
+        - a file with the ppi network in the needed format
+        If needed create more input files.
+        Hint: Save the input files into the temporary folder of the algorithm.
+        To access the path to the temporary folder use: self.output_dir
+        
+
+        Returns:
+            list: a list of the paths to the created input files
+        """
+        inputparams = []
+
+        # name ppi and seed file, specify paths to files
+        ppi_filename = self.name_file('ppi')
+        ppi_file = f'{self.output_dir}/{ppi_filename}'
+        seed_filename = self.name_file('seeds')
+        seed_file = f'{self.output_dir}/{seed_filename}'
+
+        # create ppi file
+        with open(ppi_file, "w") as file:
+            # parse through the ppi graph and write the ids of the vertices into a file
+            for edge in self.ppi_network.edges():
+                file.write(f"{str(edge.source())},{str(edge.target())}\n")
+        # add the location of the ppi network file to the list of inputparameters
+        inputparams.append(ppi_file)
+        print(f'{self.name} ppi network is saved in {ppi_file}')
+
+        # create seed file
+        with open(seed_file, "w") as file:
+            # parse through the seed list and write the ids of the vertices into a file
+            for seed in self.seeds:
+                file.write(f"{seed}\n")
+        # add the location of the seed file to the list of inputparameters
+        print(f'{self.name} seeds are saved in {seed_file}')
+        inputparams.append(seed_file)
+
+        return inputparams
+
+    def extract_output(self, algo_output):
+        """extracts a resulting disease module from an outputfile and 
+           transforms them into a list of vertices in the PPI network.
+           This list is handed back to the main CAMI suite for further
+           processing.
+           FYI: CAMI uses the indices in the PPI network to reference 
+           the input genes i.e. the genes in the list do not correspond
+           to the genes in the input files.
+
+        :param algo_output: path to outputfile
+        :type algo_output: str
+        :return: list of predicted genes
+        :rtype: list(int)
+        """