diff --git a/cami/TemplateWrapper.py b/cami/TemplateWrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..53b92cf88b5de14a446221956114eb41e4d7952b --- /dev/null +++ b/cami/TemplateWrapper.py @@ -0,0 +1,124 @@ +from AlgorithmWrapper import AlgorithmWrapper +import subprocess, os + +#MC: +from configparser import ConfigParser +import ast + +class TemplateWrapper(AlgorithmWrapper): + def __init__(self): + """Each tool needs certain predefined instance variables: + The following variables are inherited from the AlgorithmWrapper Super Class: + - UID (string) + - weight (int) + - output directory (string) + - ppi_network (Graph()) + - seeds (list()) + - home_path (string) + - config (string) + There is no need to redefine these variables when introducing a new algorithm. + + The only instance variables that need to be defined for each tool individually are: + - name (string): The name of the algorithm/tool + - code (int): A unique integer code for this tool. Choose any number that is not taken by other tools yet (currently taken are: 0,1,2,3) + - any constant numbers or variables that are defined in the config file for this tool + """ + super().__init__() + self.name = 'DIAMOnD' + self.code = 1 + config = ConfigParser() + config.read(self.config) + self.alpha = int(config.get('diamond', 'alpha')) + self.pred_factor = int(config.get('diamond', 'pred_factor')) + self.max_preds = int(config.get('diamond', 'max_preds')) + + def run_algorithm(self, inputparams): + """Execute the algorithm. Ideally the algorithm is a command line tool that takes + a single command for execution. Use the inputparams created by prepare_input() + accordingly and call the command using the subprocess library. For example: + command = f'{<algorithm>} {inputparams[0]} {inputparams[2]} {additional_parameters}' + subprocess.call(command, shell=True, stdout=subprocess.PIPE) + + If the algorithm needs multiple steps, you can conduct them here too. The only important + thing is that this function returns the extracted output from the outputfile created by + the algorithm. + + Hint: Execute the algorithm and specify the outputfile name. From that outputfile + extract the predicted Active Module by using extract_output. + + + Args: + inputparams (list): A list of parameters for the algorithm that is defined via + prepare_input() + + Returns: + list: A list of resulting genes extracted from the generated output file + """ + # example variables: + tool = 'example tool' + ppi = inputparams[0] # path to ppi inputfile + seeds = inputparams[1] # path to seed inputfile + out_filename = self.name_file('out') # name the outputfile + algo_output = f'{self.output_dir}/{out_filename}' # specify the output location + + # Conduct the algorithm + command = f'{tool} {ppi} {seeds}' + subprocess.call(command, shell=True, stdout=subprocess.PIPE) + print(f"{tool} results saved in {algo_output}") + return self.extract_output(algo_output) + + def prepare_input(self): + """prepares the input ppi and seed genes as needed by the algorithm + generally, this function will write two new inputfiles as needed by the algorithm: + - a file with the seeds in the needed format + - a file with the ppi network in the needed format + If needed create more input files. + Hint: Save the input files into the temporary folder of the algorithm. + To access the path to the temporary folder use: self.output_dir + + + Returns: + list: a list of the paths to the created input files + """ + inputparams = [] + + # name ppi and seed file, specify paths to files + ppi_filename = self.name_file('ppi') + ppi_file = f'{self.output_dir}/{ppi_filename}' + seed_filename = self.name_file('seeds') + seed_file = f'{self.output_dir}/{seed_filename}' + + # create ppi file + with open(ppi_file, "w") as file: + # parse through the ppi graph and write the ids of the vertices into a file + for edge in self.ppi_network.edges(): + file.write(f"{str(edge.source())},{str(edge.target())}\n") + # add the location of the ppi network file to the list of inputparameters + inputparams.append(ppi_file) + print(f'{self.name} ppi network is saved in {ppi_file}') + + # create seed file + with open(seed_file, "w") as file: + # parse through the seed list and write the ids of the vertices into a file + for seed in self.seeds: + file.write(f"{seed}\n") + # add the location of the seed file to the list of inputparameters + print(f'{self.name} seeds are saved in {seed_file}') + inputparams.append(seed_file) + + return inputparams + + def extract_output(self, algo_output): + """extracts a resulting disease module from an outputfile and + transforms them into a list of vertices in the PPI network. + This list is handed back to the main CAMI suite for further + processing. + FYI: CAMI uses the indices in the PPI network to reference + the input genes i.e. the genes in the list do not correspond + to the genes in the input files. + + :param algo_output: path to outputfile + :type algo_output: str + :return: list of predicted genes + :rtype: list(int) + """