Merge branch 'main' of https://gitlab.rrz.uni-hamburg.de/bay2046/cami

36aa2efc · bay9355 · 8e1bf655 · e105b38d · 36aa2efc · 36aa2efc
Commit 36aa2efc authored 2 years ago by bay9355
--- a/cami_src/cami_suite.py
+++ b/cami_src/cami_suite.py
@@ -35,6 +35,7 @@ def list_combinations(lst, k):
    assert len(set(combs)) == nof_combs
    return (combs)

+
 def initialize_cami(path_to_ppi_file=''):
    cami_params = {}
    # find homepath aka ~/cami
@@ -85,12 +86,13 @@ def initialize_cami(path_to_ppi_file=''):
    seed_lists = {seedname: preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths}


-
 class cami():
    """ A module that is used for Active Module identifaction based on a
        consensus approach
    """
-    def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf', seed_score=10, parallelization=False):
+
+    def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf',
+                 seed_score=10, parallelization=False):
        """Instance variables of CAMI

        :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
@@ -201,13 +203,15 @@ class cami():
                ref=set(self.seed_lst),
                ref_id='entrez')
            if validation_results['status'] == 'ok':
-                biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir)
+                biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}',
+                                                         self.output_dir)
                biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results,
                                                                         mode='set-set',
                                                                         tar=set(self.result_module_sets[result_set]),
                                                                         tar_id='entrez',
                                                                         out_dir=self.output_dir,
                                                                         prefix=f'{result_set}_{self.uid}')
+
    def run_threaded_tool(self, tool, pred_sets):
        """run a tool in one thread and save the results into a dictionary pred_sets

@@ -296,46 +300,45 @@ class cami():
        for tool in result_sets:
            result_sets[tool] -= set(self.seed_lst)

+        params = {'hub_pentalty': [0, 0.25, 0.5, 0.75, 1.0], 'damping_factor': [0.1, 0.25, 0.5, 0.75], 'confidence_level': [0.2, 0.35, 0.5, 0.75], 'ranking':["trustrank", "betweenness", "harmonic"], 'function':[cami_v2.run_cami, cami_v3.run_cami]}
+
        camis = {
            'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}},
            'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': {
-                'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
+                'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5
            }},
-            'cami_v2_param1_bc': {'function': cami_v2.run_cami, 'params': {
-                'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
+            'cami_v2_param1_b': {'function': cami_v2.run_cami, 'params': {
+                'hub_penalty': 0.3, 'ranking': 'betweenness', 'confidence_level': 0.5
            }},
-            'cami_v2_param1_m': {'function': cami_v2.run_cami, 'params': {
-                'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
+            'cami_v2_param1_hc': {'function': cami_v2.run_cami, 'params': {
+                'hub_penalty': 0.3,'ranking': 'harmonic', 'confidence_level': 0.5
            }},
            'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': {
                'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
            }},
-            'cami_v2_param2_m': {'function': cami_v2.run_cami, 'params': {
-                'hub_penalty': 0, 'damping_factor': 0.7, 'ranking': 'must',
+            'cami_v2_param2_b': {'function': cami_v2.run_cami, 'params': {
+                'hub_penalty': 0, 'ranking': 'betweenness', 'confidence_level': 0.5
            }},
-            'cami_v2_param2_bc': {'function': cami_v2.run_cami, 'params': {
-                'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
+            'cami_v2_param2_hc': {'function': cami_v2.run_cami, 'params': {
+                'hub_penalty': 0,  'ranking': 'harmonic', 'confidence_level': 0.5
            }},
            'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': {
-                'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
+                'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5
            }},
-            'cami_v3_param1_bc': {'function': cami_v3.run_cami, 'params': {
-                'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
+            'cami_v3_param1_b': {'function': cami_v3.run_cami, 'params': {
+                'hub_penalty': 0.3,  'ranking': 'betweenness', 'confidence_level': 0.5
            }},
-            'cami_v3_param1_m': {'function': cami_v3.run_cami, 'params': {
-                'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
+            'cami_v3_param1_hc': {'function': cami_v3.run_cami, 'params': {
+                'hub_penalty': 0.3,  'ranking': 'harmonic', 'confidence_level': 0.5
            }},
            'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': {
-                'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'trustrank'
-            }},
-            'cami_v3_param2_bc': {'function': cami_v3.run_cami, 'params': {
-                'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
+                'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
            }},
-            'cami_v3_param2_m': {'function': cami_v3.run_cami, 'params': {
-                'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must'
+            'cami_v3_param2_b': {'function': cami_v3.run_cami, 'params': {
+                'hub_penalty': 0,  'ranking': 'betweenness', 'confidence_level': 0.5
            }},
-            'cami_v3_param3_m': {'function': cami_v3.run_cami, 'params': {
-                'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must', 'trees': 15
+            'cami_v3_param2_hc': {'function': cami_v3.run_cami, 'params': {
+                'hub_penalty': 0, 'ranking': 'harmonic', 'confidence_level': 0.5
            }},
        }

@@ -369,7 +372,8 @@ class cami():
                for vertex in cami_vlist:
                    print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}')
            else:
-                print(f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module')
+                print(
+                    f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module')

            # for visualization with nvenn
            self.result_gene_sets[cami_method_name] = set(cami_genes)
@@ -381,8 +385,10 @@ class cami():

        # add seeds to result sets for drugstone and digest
        for toolname in self.result_gene_sets:
-            self.result_module_sets[toolname] = self.result_gene_sets[toolname].union(set([gene_name_map[svertex] for svertex in self.seed_lst]))
-            print(f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
+            self.result_module_sets[toolname] = self.result_gene_sets[toolname].union(
+                set([gene_name_map[svertex] for svertex in self.seed_lst]))
+            print(
+                f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')

    def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
                        gene_name_map, codes2tools, cami_scores):
@@ -390,13 +396,16 @@ class cami():
        if self.debug:
            print('Saving the results...')
        with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
-            outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+
+            outputfile.write(
+                f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n' +
                f'initially: {self.initial_seed_lst}\n')
            outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n')
            all_vertices = cami_vertices.union(putative_vertices)
            for vertex in all_vertices:
-                outputfile.write(f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
-        if self.debug: print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
+                outputfile.write(
+                    f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
+        if self.debug: print(
+            f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')

        # save the predictions made by cami
        ncbi_url = ('\tncbi_url' if self.ncbi else '')
@@ -414,7 +423,8 @@ class cami():
                        summary = ''
                else:
                    url, summary = '', ''
-                outputfile.write(f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
+                outputfile.write(
+                    f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')

        # # save the whole module
        # whole_module = []
@@ -429,7 +439,6 @@ class cami():
        # print(f'saved {cami_method} output in: {cami_method}_output_{self.uid}.tsv')
        # print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/{cami_method}_module_{self.uid}.txt')

-       
        # save predicted modules by all other tools
        for tool in self.result_module_sets:
            with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile:
@@ -439,7 +448,6 @@ class cami():
            if self.debug:
                print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')

-
    def use_nvenn(self):
        """Create Venn Diagrams via a external tool named degradome.
           Sends a request via requests to the degradome server.
@@ -503,4 +511,3 @@ class cami():
        for seed in removed_seeds:
            self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0
        return removed_seeds
-    
--- a/cami_src/consensus/cami_v2.py
+++ b/cami_src/consensus/cami_v2.py
 import sys
 from collections import defaultdict
 import graph_tool as gt
-from utils.networks import trustrank, betweenness, must
+from utils.networks import trustrank, betweenness, must, closeness



@@ -38,12 +38,15 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
    if ranking_method == 'trustrank':
        scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
    elif ranking_method == 'betweenness':
-        scores = betweenness(subnet, seed_lst, hub_penalty, weights)
+        scores = betweenness(subnet, hub_penalty, weights)
    elif ranking_method == 'must':
        scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees)
+    elif ranking_method == 'harmonic':
+        scores = closeness(subnet, hub_penalty, weights)

    putative_scores = scores.a[[int(id) for id in putative_vertices]]
    putative_scores.sort()
+
    threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
    for v in putative_vertices:
        if scores.a[int(v)] >= threshold and scores.a[int(v)] > 0:

--- a/cami_src/consensus/cami_v3.py
+++ b/cami_src/consensus/cami_v3.py
 import sys
 from collections import defaultdict
-from utils.networks import trustrank, betweenness, must
+from utils.networks import trustrank, betweenness, must, closeness
 import graph_tool as gt

+
 # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
 # TODO maybe find a smart way to cutoff automatically?
 def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
    damping_factor = params['damping_factor']
    hub_penalty = params['hub_penalty']
-    confidence_level = params['confidence_level']
+    confidence_level = params.get('confidence_level', 0.5)
    weighted = 'weighted' in params and params['weighted']
    ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
    trees = params.get('trees', 5)
@@ -40,9 +41,11 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
        if ranking_method == 'trustrank':
            scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
        elif ranking_method == 'betweenness':
-            scores = betweenness(subnet, seed_lst, hub_penalty, weights)
+            scores = betweenness(subnet, hub_penalty, weights)
        elif ranking_method == 'must':
            scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance)
+        elif ranking_method == 'harmonic':
+            scores = closeness(subnet, hub_penalty, weights)
        tool_scores[tool] = scores

    putative_score_map = defaultdict(lambda: 0)

--- a/cami_src/example_run.py
+++ b/cami_src/example_run.py
@@ -9,5 +9,5 @@ chdir((sys.argv[0].rsplit('/', 1))[0])
 networkfile = "../data/input/networks/example_network.tsv"
 seedfile = "../data/input/seeds/example_seeds.txt"
 identifier = "example_run"
-command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -d -p --f;'
+command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -p --f;'
 subprocess.call(command, shell=True)
\ No newline at end of file
--- a/cami_src/utils/networks.py
+++ b/cami_src/utils/networks.py
@@ -9,6 +9,7 @@ import graph_tool.topology as gtt
 import graph_tool.util as gtu
 import itertools as it

+
 def edge_weights(g, base_weigths, hub_penalty, inverse=False):
    avdeg = gts.vertex_average(g, "total")[0]
    weights = g.new_edge_property("double", val=avdeg)
@@ -124,6 +125,7 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):

    return g2

+
 def find_bridges(g):
    r"""Finds all bridges in a graph."""
    global __time
@@ -139,6 +141,7 @@ def find_bridges(g):
            __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge)
    return is_bridge

+
 def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge):
    visited[node] = True
    global __time
@@ -246,19 +249,27 @@ def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
    return scores


-def betweenness(g, seed_ids, hub_penalty, weights=None):
+def betweenness(g, hub_penalty, weights=None):
    if gt.openmp_enabled():
        gt.openmp_set_num_threads(6)
    weights = edge_weights(g, weights, hub_penalty, inverse=True)
-    scores = g.new_vertex_property("float")
-    all_pairs = [(source, target) for source in seed_ids for target in seed_ids if source < target]
-    for source, target in all_pairs:
-        local_scores = g.new_vertex_property("float")
-        num_paths = 0.0
-        for path in gtt.all_shortest_paths(g, source, target, weights=weights):
-            local_scores.a[path[1:-1]] += 1
-            num_paths += 1
-        if num_paths > 0:
-            local_scores.a /= num_paths
-        scores.a += local_scores.a
+    # Call graph-tool to compute TrustRank.
+    # trust = g.new_vertex_property("double")
+    scores, _ = gtc.betweenness(g, weight=weights)
+    # Compute and return the results.
    return scores
+
+
+def closeness(g, hub_penalty, weights=None):
+    if gt.openmp_enabled():
+        gt.openmp_set_num_threads(6)
+    weights = edge_weights(g, weights, hub_penalty, inverse=True)
+    # Call graph-tool to compute TrustRank.
+    # trust = g.new_vertex_property("double")
+    scores = gtc.closeness(g, weight=weights, harmonic=True)
+    # Compute and return the results.
+    return scores
+
+
+
+