diff --git a/cami/DominoWrapper.py b/cami/DominoWrapper.py index ca94f59a9bed9305349808b52de4817d13ccfe35..2c8c6f70ffca323ae78cd23ddcf385805a40a284 100644 --- a/cami/DominoWrapper.py +++ b/cami/DominoWrapper.py @@ -17,6 +17,8 @@ class DominoWrapper(AlgorithmWrapper): self.visualization_flag = config.get('domino', 'visualization_flag') self.output_name = config.get('domino', 'output_name') + self.parallels = config.get('domino', 'para') + self.c = config.get('domino', 'c') def run_algorithm(self, inputparams): """Run Domino algorithm @@ -39,19 +41,18 @@ class DominoWrapper(AlgorithmWrapper): #MC: #CONFIG: visualization_flag = False - command = f'domino -a {seeds} -n {ppi} -s {slices_file} \ - -o {self.output_dir} -v {self.visualization_flag}' - subprocess.call(command, shell=True, stdout=subprocess.PIPE) - outputname = (os.path.basename(seeds)).rsplit(".")[0] + command = f'domino -a "{seeds}" -n "{ppi}" -s "{slices_file}" \ + -o "{self.output_dir}" -v {self.visualization_flag} -p {self.parallels} --use_cache {self.c}' + run = subprocess.run(command, shell=True, capture_output=True) + match = re.search("( final modules are reported at )(.*)(\n)", run.stdout.decode('utf-8')) + algo_output = match.group(2) #MC: #CONFIG output_name = 'modules.out' - algo_output = os.path.join(self.output_dir, outputname, self.output_name) + assert os.path.exists(algo_output), f'Could not create output file {algo_output} for domino' outputfilename = self.name_file('out', 'out') - command = f'mv {algo_output} {os.path.join(self.output_dir, outputfilename)}' + command = f'mv "{algo_output}" "{os.path.join(self.output_dir, outputfilename)}"' subprocess.call(command, shell=True, stdout=subprocess.PIPE) algo_output = os.path.join(self.output_dir, outputfilename) - - assert os.path.exists(algo_output), f'Could not find output file {algo_output} for domino' print(f"{self.name} results saved in {algo_output}") return self.extract_output(algo_output) @@ -92,7 +93,7 @@ class DominoWrapper(AlgorithmWrapper): if not os.path.exists(slices_output): print('creating domino slices_file...') - command = f'slicer --network_file {ppi_file} --output_file {slices_output}' + command = f'slicer --network_file "{ppi_file}" --output_file "{slices_output}"' subprocess.call(command, shell=True, stdout=subprocess.PIPE) print(f'{self.name} slices are saved in {slices_output}') inputparams.append(slices_output) diff --git a/cami/cami.py b/cami/cami.py index ab789b5d7abc6bf582cb70988cbf4472e7c69839..98a12e66b392b0060a7bda8831ee1d84537cc79a 100755 --- a/cami/cami.py +++ b/cami/cami.py @@ -22,7 +22,7 @@ from configparser import ConfigParser import ast def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, - output_dir, identifier, save_temps, visualize, save_image, force, drugstone, ncbi, configuration, seed_variation): + output_dir, identifier, save_temps, nvenn, save_image, force, drugstone, ncbi, configuration, seed_variation, parallelization): print('CAMI started') config = ConfigParser() config.read(configuration) @@ -58,29 +58,28 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, # change directory to ~/cami/cami (home of cami.py) cami_home = sys.argv[0].rsplit('/', 1) os.chdir(cami_home[0]) - print(cami_home[0]) home_path = os.path.dirname(os.getcwd()) - print(home_path) + print(f"Home directory of cami: {home_path}") if identifier==None: identifier = str(uuid.uuid4()) if output_dir==None: - output_dir = f'{home_path}/data/output/{identifier}' + output_dir = os.path.join(home_path, f'data/output/{identifier}') + output_dir = os.path.abspath(output_dir) + print(f"Output directory of cami: {output_dir}") if not os.path.exists(output_dir): os.makedirs(output_dir) - while output_dir[-1] == '/': - output_dir = output_dir[:-1] - if not os.path.exists(output_dir): os.makedirs(output_dir) #create temporary directory - tmp_dir = f'{home_path}/data/tmp/{identifier}' + tmp_dir = os.path.join(home_path, f'data/tmp/{identifier}') print(f'Creating unique temporary directory for CAMI: {tmp_dir}') if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) + elif not force: print(f'TemporaryDirectory {tmp_dir} already exists.') choice = input('overwrite? [y/n]') @@ -103,10 +102,8 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, for idx, tool in enumerate(tool_wrappers): tool.set_weight(float(tool_weights[idx])) - original_ppi = ppi_graph.copy() - cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path, configuration, seed_score) + cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path, configuration, seed_score, parallelization) - if ncbi: cami.ncbi = True @@ -119,9 +116,10 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, result_sets = cami.make_predictions() cami.create_consensus(result_sets) - if visualize or save_image: - url = cami.visualize() - if visualize: + if nvenn or save_image: + print('Sending results to nVenn') + url = cami.use_nvenn() + if nvenn: webbrowser.open(url) if save_image: cami.download_diagram(url) @@ -145,8 +143,8 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, def make_consensus(vis=False): result_sets = cami.make_predictions() cami.create_consensus(result_sets) - if visualize and vis: - url = cami.visualize() + if nvenn and vis: + url = cami.nvenn() cami.download_diagram(url) with open('/Users/Mia/cami_local/cami/data/output/explorativeness.tsv', 'a') as f: make_consensus(vis=True) @@ -159,7 +157,6 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, for vertex in cami.ppi_graph.vertices(): node_degrees.write(f'{cami.ppi_vertex2gene[vertex]}\t{vertex.out_degree()}\t{vertex.in_degree()}\n') - # initialize cami and seed_var base_seeds = cami.origin_seed_lst original_seeds = [cami.ppi_vertex2gene[seed] for seed in base_seeds] @@ -397,7 +394,7 @@ if __name__ == "__main__": parser.add_argument('-o', '--output_dir', action='store', help="path to output directory") parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID") parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files") - parser.add_argument('-v', '--visualize', action='store_true', help="Visualize results using Degradome, an external webtool. Please note that degradome can only be used for visualization with up to 5 tools.") + parser.add_argument('-v', '--nvenn', action='store_true', help="Visualize results using nVenn by Degradome, an external webtool. Please note that degradome can only be used for visualization with up to 5 tools.") parser.add_argument('-img', '--save_image', action='store_true', help="Save the venn diagram from the visualization as png. (Only possible for up to 5 tools)") parser.add_argument('-f', '--force', action='store_true', help="Ignore warnings and overwrite everything when excecuting CAMI.") parser.add_argument('-d', '--drugstone', nargs='*', action='store', default=None, @@ -406,6 +403,8 @@ if __name__ == "__main__": help="Save the NCBI URLs and Summaries of the genes in the CAMI output.") parser.add_argument('-conf', '--configuration', nargs='*', action='store', default='camiconf', help="Choose a configuration for the static variables.") + parser.add_argument('-p', '--parallelization', action='store_true', + help="run the tools for prediction parallelized") #TODO List with additional arguments if needed by certain tools args = vars(parser.parse_args()) diff --git a/cami/cami_suite.py b/cami/cami_suite.py index be36532e7127387889e563f78c3484ed707dc4b8..753fea1f1f41ac877f886331ef1a7544d1c7f042 100644 --- a/cami/cami_suite.py +++ b/cami/cami_suite.py @@ -1,4 +1,4 @@ -import degradome, drugstone, ncbi, random +import degradome, drugstone, ncbi, time, threading def list_combinations(lst, k): """creates all possible combinations of length k with two objects in a list @@ -31,7 +31,7 @@ class cami(): """ A module that is used for Active Module identifaction based on a consensus approach """ - def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, tmp_dir, home_path, config, seed_score): + def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, tmp_dir, home_path, config, seed_score, parallelization): """Instance variables of CAMI :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of @@ -48,7 +48,7 @@ class cami(): :type tmp_dir: str :param home_path: Path to the cami home directory (gitlab repository) :type home_path: str - """ + """ self.ppi_graph = ppi_graph self.origin_ppi_graph = ppi_graph.copy() self.ppi_vertex2gene = self.ppi_graph.vertex_properties["name"] @@ -69,6 +69,7 @@ class cami(): self.ncbi = False self.config = config self.seed_score = seed_score + self.threaded = parallelization # set weights for seed genes in ppi_graph for seed in self.seed_lst: self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score @@ -113,6 +114,16 @@ class cami(): def make_evaluation(self): print('Evaluation not implemented yet.') + + def run_threaded_tool(self, tool, pred_sets): + """run a tool in one thread and save the results into a dictionary pred_sets + + Args: + tool (AlgorithmWrapper): Wrapper class for a tool + pred_sets (dict): a dictionary that maps a tool to its result set + """ + preds = self.run_tool(tool) + pred_sets[tool] = preds #- seed_set def make_predictions(self): """create all predictions using the tools specified in tool_wrappers @@ -122,11 +133,21 @@ class cami(): :rtype: dict(AlgorithmWrapper():set(Graph.vertex())) """ print(f'Creating result sets of all {self.nof_tools} tools...') - pred_sets = {} - for tool in self.tool_wrappers: - preds = self.run_tool(tool) - pred_sets[tool] = preds #- seed_set + pred_sets = {tool:None for tool in self.tool_wrappers} + + if self.threaded: + threads = [threading.Thread(target=self.run_threaded_tool, args=(tool, pred_sets,)) + for tool in self.tool_wrappers] + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + else: + for tool in self.tool_wrappers: + pred_sets[tool] = self.run_tool(tool) + assert(list(pred_sets.values()).count(None) < 1) result_sets = {tool:set([self.ppi_graph.vertex(idx) for idx in pred_sets[tool]]) for tool in pred_sets} return result_sets @@ -272,7 +293,7 @@ class cami(): # for drugstone self.cami_vertices = cami_vlist - def visualize(self): + def use_nvenn(self): """Create Venn Diagrams via a external tool named degradome. Sends a request via requests to the degradome server. Returns the URL of the result. @@ -282,7 +303,7 @@ class cami(): print('Visualizing results using Degradome...') degradome_sets = {tool:self.result_gene_sets[tool] for tool in self.result_gene_sets - if len(self.result_gene_sets[tool])>0} + if len(self.result_gene_sets[tool])>0} url = degradome.send_request(degradome_sets) with open(f'{self.output_dir}/venn_link_{self.uid}.txt', 'w') as f: f.write(url) @@ -320,6 +341,7 @@ class cami(): print('The link was also saved in the outputfolder for later.') with open(f'{self.output_dir}/drugstone_link_{self.uid}.txt', 'w') as f: f.write(url) + return url def remove_seeds(self, idx_lst): """remove seeds at indices idx diff --git a/cami/camiconf b/cami/camiconf index ddbef63ec9b5a935b7febdb77323f60a12a6519e..b91c2bd53d83f36bab59ee0092545bfe337bb4e7 100644 --- a/cami/camiconf +++ b/cami/camiconf @@ -10,6 +10,8 @@ [domino] visualization_flag = False output_name = 'modules.out' +para = 1 +c = 'false' [diamond] alpha : 1 diff --git a/cami/example_run.py b/cami/example_run.py index af0657ecaa0d162a6bfa97db777f03398490a62b..372f7a6623c272414c74d32e9a02a454ad877b59 100755 --- a/cami/example_run.py +++ b/cami/example_run.py @@ -9,5 +9,5 @@ chdir((sys.argv[0].rsplit('/', 1))[0]) networkfile = "../data/input/networks/example_network.tsv" seedfile = "../data/input/seeds/example_seeds.txt" identifier = "example_run" -command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -ncbi -img -d --f;' +command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -d -img -p --f;' subprocess.call(command, shell=True) \ No newline at end of file