threading hinzugefügt

8738ad38 · Le, Mia · 45466c25 · 8738ad38 · 8738ad38 · 8738ad38
Commit 8738ad38 authored Aug 19, 2022 by Le, Mia
--- a/cami/DominoWrapper.py
+++ b/cami/DominoWrapper.py
@@ -17,6 +17,8 @@ class DominoWrapper(AlgorithmWrapper):
        self.visualization_flag = config.get('domino', 'visualization_flag')
        self.output_name = config.get('domino', 'output_name')
+        self.parallels = config.get('domino', 'para')
+        self.c = config.get('domino', 'c')
    def run_algorithm(self, inputparams):
        """Run Domino algorithm
@@ -39,19 +41,18 @@ class DominoWrapper(AlgorithmWrapper):
        #MC:
        #CONFIG: visualization_flag = False
-        command = f'domino -a {seeds} -n {ppi} -s {slices_file} \
+        command = f'domino -a "{seeds}" -n "{ppi}" -s "{slices_file}" \
-            -o {self.output_dir} -v {self.visualization_flag}'
+            -o "{self.output_dir}" -v {self.visualization_flag} -p {self.parallels} --use_cache {self.c}'
-        subprocess.call(command, shell=True, stdout=subprocess.PIPE)
+        run = subprocess.run(command, shell=True, capture_output=True)
-        outputname = (os.path.basename(seeds)).rsplit(".")[0]
+        match = re.search("( final modules are reported at )(.*)(\n)", run.stdout.decode('utf-8'))
+        algo_output = match.group(2)
        #MC:
        #CONFIG output_name = 'modules.out'
-        algo_output = os.path.join(self.output_dir, outputname, self.output_name)
+        assert os.path.exists(algo_output), f'Could not create output file {algo_output} for domino'
        outputfilename = self.name_file('out', 'out')
-        command = f'mv {algo_output} {os.path.join(self.output_dir, outputfilename)}'
+        command = f'mv "{algo_output}" "{os.path.join(self.output_dir, outputfilename)}"'
        subprocess.call(command, shell=True, stdout=subprocess.PIPE)
        algo_output = os.path.join(self.output_dir, outputfilename)
-        assert os.path.exists(algo_output), f'Could not find output file {algo_output} for domino'
        print(f"{self.name} results saved in {algo_output}")
        return self.extract_output(algo_output)
@@ -92,7 +93,7 @@ class DominoWrapper(AlgorithmWrapper):
        if not os.path.exists(slices_output):
            print('creating domino slices_file...')
-            command = f'slicer --network_file {ppi_file} --output_file {slices_output}'
+            command = f'slicer --network_file "{ppi_file}" --output_file "{slices_output}"'
            subprocess.call(command, shell=True, stdout=subprocess.PIPE)
        print(f'{self.name} slices are saved in {slices_output}')
        inputparams.append(slices_output)


--- a/cami/cami.py
+++ b/cami/cami.py
@@ -22,7 +22,7 @@ from configparser import ConfigParser
 import ast
 def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
-         output_dir, identifier, save_temps, visualize, save_image, force, drugstone, ncbi, configuration, seed_variation):
+         output_dir, identifier, save_temps, nvenn, save_image, force, drugstone, ncbi, configuration, seed_variation, parallelization):
    print('CAMI started')
    config = ConfigParser()
    config.read(configuration)
@@ -58,29 +58,28 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
    # change directory to ~/cami/cami (home of cami.py)
    cami_home = sys.argv[0].rsplit('/', 1)
    os.chdir(cami_home[0])
-    print(cami_home[0])
    home_path = os.path.dirname(os.getcwd())
-    print(home_path)
+    print(f"Home directory of cami: {home_path}")
    if identifier==None:
        identifier = str(uuid.uuid4())
    if output_dir==None:
-        output_dir = f'{home_path}/data/output/{identifier}'
+        output_dir = os.path.join(home_path, f'data/output/{identifier}')
+        output_dir = os.path.abspath(output_dir)
+        print(f"Output directory of cami: {output_dir}")
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
-    while output_dir[-1] == '/':
-        output_dir = output_dir[:-1]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    #create temporary directory
-    tmp_dir = f'{home_path}/data/tmp/{identifier}'
+    tmp_dir = os.path.join(home_path, f'data/tmp/{identifier}')
    print(f'Creating unique temporary directory for CAMI: {tmp_dir}')
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)
    elif not force:
        print(f'TemporaryDirectory {tmp_dir} already exists.')
        choice = input('overwrite? [y/n]')
@@ -103,9 +102,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
    for idx, tool in enumerate(tool_wrappers):
        tool.set_weight(float(tool_weights[idx]))
-    original_ppi = ppi_graph.copy()
+    cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path, configuration, seed_score, parallelization)
-    cami = cami_suite.cami(ppi_graph, seed_lst, tool_wrappers, output_dir, identifier, tmp_dir, home_path, configuration, seed_score)
    if ncbi:
        cami.ncbi = True
@@ -119,9 +116,10 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
        result_sets = cami.make_predictions()
        cami.create_consensus(result_sets)
-        if visualize or save_image:
+        if nvenn or save_image:
-            url = cami.visualize()
+            print('Sending results to nVenn')
-            if visualize:
+            url = cami.use_nvenn()
+            if nvenn:
                webbrowser.open(url)
            if save_image:
                cami.download_diagram(url)
@@ -145,8 +143,8 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
        def make_consensus(vis=False):
            result_sets = cami.make_predictions()
            cami.create_consensus(result_sets)
-            if visualize and vis:
+            if nvenn and vis:
-                url = cami.visualize()
+                url = cami.nvenn()
                cami.download_diagram(url)
        with open('/Users/Mia/cami_local/cami/data/output/explorativeness.tsv', 'a') as f:
            make_consensus(vis=True)
@@ -159,7 +157,6 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
            for vertex in cami.ppi_graph.vertices():
                node_degrees.write(f'{cami.ppi_vertex2gene[vertex]}\t{vertex.out_degree()}\t{vertex.in_degree()}\n')   
        # initialize cami and seed_var
        base_seeds = cami.origin_seed_lst
        original_seeds = [cami.ppi_vertex2gene[seed] for seed in base_seeds]
@@ -397,7 +394,7 @@ if __name__ == "__main__":
    parser.add_argument('-o', '--output_dir', action='store', help="path to output directory")
    parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID")
    parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files")
-    parser.add_argument('-v', '--visualize', action='store_true', help="Visualize results using Degradome, an external webtool. Please note that degradome can only be used for visualization with up to 5 tools.")
+    parser.add_argument('-v', '--nvenn', action='store_true', help="Visualize results using nVenn by Degradome, an external webtool. Please note that degradome can only be used for visualization with up to 5 tools.")
    parser.add_argument('-img', '--save_image', action='store_true', help="Save the venn diagram from the visualization as png. (Only possible for up to 5 tools)")
    parser.add_argument('-f', '--force', action='store_true', help="Ignore warnings and overwrite everything when excecuting CAMI.")
    parser.add_argument('-d', '--drugstone', nargs='*', action='store', default=None,
@@ -406,6 +403,8 @@ if __name__ == "__main__":
                        help="Save the NCBI URLs and Summaries of the genes in the CAMI output.")
    parser.add_argument('-conf', '--configuration', nargs='*', action='store', default='camiconf',
                        help="Choose a configuration for the static variables.")
+    parser.add_argument('-p', '--parallelization', action='store_true', 
+            help="run the tools for prediction parallelized")
    #TODO List with additional arguments if needed by certain tools
    args = vars(parser.parse_args())


--- a/cami/cami_suite.py
+++ b/cami/cami_suite.py
-import degradome, drugstone, ncbi, random
+import degradome, drugstone, ncbi, time, threading
 def list_combinations(lst, k):
    """creates all possible combinations of length k with two objects in a list
@@ -31,7 +31,7 @@ class cami():
    """ A module that is used for Active Module identifaction based on a
        consensus approach
    """
-    def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, tmp_dir, home_path, config, seed_score):
+    def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, tmp_dir, home_path, config, seed_score, parallelization):
        """Instance variables of CAMI
        :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
@@ -69,6 +69,7 @@ class cami():
        self.ncbi = False
        self.config = config
        self.seed_score = seed_score
+        self.threaded = parallelization
        # set weights for seed genes in ppi_graph
        for seed in self.seed_lst:
            self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score
@@ -114,6 +115,16 @@ class cami():
    def make_evaluation(self):
        print('Evaluation not implemented yet.')
+    def run_threaded_tool(self, tool, pred_sets):
+        """run a tool in one thread and save the results into a dictionary pred_sets
+        Args:
+            tool (AlgorithmWrapper): Wrapper class for a tool
+            pred_sets (dict): a dictionary that maps a tool to its result set
+        """
+        preds = self.run_tool(tool)
+        pred_sets[tool] = preds #- seed_set
    def make_predictions(self):
        """create all predictions using the tools specified in tool_wrappers
@@ -122,11 +133,21 @@ class cami():
        :rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
        """
        print(f'Creating result sets of all {self.nof_tools} tools...')
-        pred_sets = {}
+        pred_sets = {tool:None for tool in self.tool_wrappers}
+        if self.threaded:
+            threads = [threading.Thread(target=self.run_threaded_tool, args=(tool, pred_sets,)) 
+                    for tool in self.tool_wrappers]
+            for thread in threads:
+                thread.start()
+            for thread in threads:
+                thread.join()
+        else:
            for tool in self.tool_wrappers:
-            preds = self.run_tool(tool)
+                pred_sets[tool] = self.run_tool(tool)
-            pred_sets[tool] = preds #- seed_set
+        assert(list(pred_sets.values()).count(None) < 1)
        result_sets = {tool:set([self.ppi_graph.vertex(idx) for idx in pred_sets[tool]])
                       for tool in pred_sets}
        return result_sets
@@ -272,7 +293,7 @@ class cami():
        # for drugstone
        self.cami_vertices = cami_vlist
-    def visualize(self):
+    def use_nvenn(self):
        """Create Venn Diagrams via a external tool named degradome.
           Sends a request via requests to the degradome server.
           Returns the URL of the result.
@@ -320,6 +341,7 @@ class cami():
        print('The link was also saved in the outputfolder for later.')
        with open(f'{self.output_dir}/drugstone_link_{self.uid}.txt', 'w') as f:
            f.write(url)
+        return url
    def remove_seeds(self, idx_lst):
        """remove seeds at indices idx


--- a/cami/camiconf
+++ b/cami/camiconf
@@ -10,6 +10,8 @@
 [domino]
 visualization_flag = False
 output_name = 'modules.out'
+para = 1
+c = 'false'
 [diamond]
 alpha : 1


--- a/cami/example_run.py
+++ b/cami/example_run.py
@@ -9,5 +9,5 @@ chdir((sys.argv[0].rsplit('/', 1))[0])
 networkfile = "../data/input/networks/example_network.tsv"
 seedfile = "../data/input/seeds/example_seeds.txt"
 identifier = "example_run"
-command = f'./cami.py -n {networkfile} -s {seedfile}  -id {identifier} -ncbi -img -d --f;'
+command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -d -img -p --f;'
 subprocess.call(command, shell=True)
\ No newline at end of file