From a7feae0a993401cc4fcc217ab7967ddeffac5771 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Wed, 17 May 2023 19:58:52 +0200
Subject: [PATCH] fixed direction of confidence_level

---
 cami_src/cami.py              |  4 +--
 cami_src/cami_suite.py        | 50 +++++++++++++++++------------------
 cami_src/consensus/cami_v2.py | 25 +++++++++++++++---
 cami_src/consensus/cami_v3.py |  1 +
 cami_src/example_run.py       |  3 ++-
 cami_src/utils/networks.py    |  8 +++---
 6 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/cami_src/cami.py b/cami_src/cami.py
index 06969d3..0ba5922 100755
--- a/cami_src/cami.py
+++ b/cami_src/cami.py
@@ -77,7 +77,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
         if consensus:
             cami.reset_cami()
 
-    if evaluate or (not consensus and not evaluate and not seed_variation):
+    if evaluate and (consensus or seed_variation):
         cami.make_evaluation()
         
     # SEED VARIATION
@@ -122,7 +122,7 @@ if __name__ == "__main__":
             help="List of weights for the tools. If you have [domino, diamond, robust] as list of tools and diamonds weight should be twice as high as the other tools type: 1 2 1")
     parser.add_argument('-c', '--consensus', action='store_true', help="run only the consensus prediction part of cami")
     parser.add_argument('-var', '--seed_variation', action='store', help="repeat consensus selection multiple times (please provide the number of iterations) while removing 20 percent of the seeds.")
-    parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST")
+    parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST", default=False)
     parser.add_argument('-o', '--output_dir', action='store', help="path to output directory", default=None)
     parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID")
     parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files")
diff --git a/cami_src/cami_suite.py b/cami_src/cami_suite.py
index e1ed51a..7b33336 100644
--- a/cami_src/cami_suite.py
+++ b/cami_src/cami_suite.py
@@ -12,7 +12,6 @@ import matplotlib.pyplot as plt
 import itertools
 
 
-
 def generate_param_combinations(params_dict):
     """
     Generates all possible combinations of parameters for the given function(s) and returns them as a list.
@@ -36,6 +35,7 @@ def generate_param_combinations(params_dict):
             result.append([function_name, params_str, {'params': param_dict, 'function': function}])
     return result
 
+
 def initialize_cami(path_to_ppi_file=''):
     cami_params = {}
     # find homepath aka ~/cami
@@ -91,8 +91,9 @@ class cami():
         consensus approach
     """
 
-    def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='', configuration='camiconf',
-                 parallelization=False, ncbi=False, debug=False,save_temps=False, toolweights=None):
+    def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='',
+                 configuration='camiconf',
+                 parallelization=False, ncbi=False, debug=False, save_temps=False, toolweights=None):
         """Instance variables of CAMI
 
         :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
@@ -122,7 +123,7 @@ class cami():
         self.prediction_tools = [wrapper.name for wrapper in tool_wrappers]
         self.toolweights = toolweights
         self.home_path = home_path
-        if uid==None:
+        if uid == None:
             uid = str(uuid.uuid4())
         self.uid = str(uid)
         if output_dir == None:
@@ -132,7 +133,7 @@ class cami():
         if self.debug:
             print(f"Output directory of cami: {output_dir}")
         self.output_dir = output_dir
-        
+
         tmp_dir = os.path.join(home_path, 'data', 'tmp', self.uid)
         if not os.path.exists(tmp_dir):
             os.makedirs(tmp_dir)
@@ -190,7 +191,7 @@ class cami():
             tool.set_weight(self.toolweights[tool.code - 1])
         else:
             tool.set_weight()
-    
+
     def initialize_all_tools(self):
         for tool in self.tool_wrappers:
             self.initialize_tool(tool)
@@ -217,7 +218,7 @@ class cami():
         seed_gene_lst = [self.ppi_vertex2gene[seed] for seed in self.seed_lst]
         ppi_graph_file = os.path.join(self.tmp_dir, f'ppi_graph_{self.uid}.graphml')
         self.ppi_graph.save(ppi_graph_file)
-        
+
         biodigest.setup.main(setup_type="api")
         for result_set in self.result_module_sets:
             set_validation_results = biodigest.single_validation.single_validation(
@@ -227,7 +228,7 @@ class cami():
                 distance='jaccard',
                 ref=set(seed_gene_lst),
                 ref_id='entrez')
-            
+
             if set_validation_results['status'] == 'ok':
                 biodigest.single_validation.save_results(set_validation_results, f'{result_set}_{self.uid}',
                                                          self.output_dir)
@@ -238,14 +239,14 @@ class cami():
                                                                          out_dir=self.output_dir,
                                                                          prefix=f'{result_set}_{self.uid}',
                                                                          file_type='png')
-                
+
                 with open(os.path.join(self.tmp_dir, f'{result_set}_{self.uid}_relevance_scores.tsv'), 'w') as f:
                     rel_score_name = list(set_validation_results['input_values']['values'].keys())[0]
                     f.write(f'value\t{rel_score_name}\n')
                     val_res_dct = set_validation_results['input_values']['values'][rel_score_name]
                     for val in val_res_dct:
                         f.write(f'{val}\t{val_res_dct[val]}\n')
-                
+
             # sub_validation_results = biodigest.single_validation.single_validation(
             #     tar=set(self.result_module_sets[result_set]),
             #     tar_id='entrez',
@@ -267,7 +268,6 @@ class cami():
             #                                                              out_dir=self.output_dir,
             #                                                              prefix=f'{result_set}_{self.uid}',
             #                                                              file_type='png')
-            
 
     def run_threaded_tool(self, tool, pred_sets):
         """run a tool in one thread and save the results into a dictionary pred_sets
@@ -347,7 +347,7 @@ class cami():
         # set of all result genes 
         cami_scores = self.ppi_graph.vertex_properties["cami_score"]
         predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
-        consens_threshold = min(self.nof_tools, 2)
+        # consens_threshold = min(self.nof_tools, 2)
         ppi_graph = self.ppi_graph
         seed_list = self.seed_lst
         tool_name_map = self.code2toolname
@@ -356,22 +356,22 @@ class cami():
         # remove seeds from result sets
         for tool in result_sets:
             result_sets[tool] -= set(self.seed_lst)
-        
+
         params_tr = {'hub_penalty': [0.25],
-                     'damping_factor': [0.7], 
-                     'confidence_level': [0.5],
+                     'damping_factor': [0.75],
+                     'confidence_level': [0.8],
                      'ranking': ['trustrank'],
-                     'function': {'cami_v3':cami_v3.run_cami}}
-        
+                     'function': {'cami_v3': cami_v3.run_cami}}
+
         cami_setting_list = generate_param_combinations(params_tr)
 
         camis = {}
         for setting in cami_setting_list:
             if setting[1]:
-                func_name = setting[0] + '_' +setting[1].rsplit('_', 1)[-1]
+                func_name = setting[0] + '_' + setting[1].rsplit('_', 1)[-1]
             else:
                 func_name = setting[0]
-            
+
             camis[func_name] = setting[2]
 
         # transform all vertex indices to their corresponding gene names in a result set
@@ -413,7 +413,8 @@ class cami():
             sys.setrecursionlimit(recursion_limit)
             # save the results in outputfiles
             if save_output:
-                self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
+                self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices,
+                                     cami_genes,
                                      gene_name_map, codes2tools, cami_scores)
 
         # add seeds to result sets for drugstone and digest
@@ -423,7 +424,6 @@ class cami():
             print(
                 f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
 
-        
     def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
                         gene_name_map, codes2tools, cami_scores):
         # save all predictions by all tools
@@ -468,8 +468,8 @@ class cami():
                     outputfile.write(f'{gene}\n')
             if self.debug:
                 print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
-    
-    def visualize_and_save_comparison_matrix(self, additional_id='', 
+
+    def visualize_and_save_comparison_matrix(self, additional_id='',
                                              title='Intersections of result_gene_sets of all analyzed algorithms.'):
         """Create a comparison matrix of the results of all tools. And save it as png file.
         """
@@ -489,7 +489,7 @@ class cami():
         plt.close(comp_fig)
         plt.close(norm_fig)
         return comp_fig_file, norm_fig_file
-        
+
     def use_nvenn(self, download=False):
         """Create Venn Diagrams via a external tool named nvenn by degradome.
            Sends a request via requests to the degradome server.
@@ -510,7 +510,7 @@ class cami():
         else:
             print('Cannot use degradome to create venn diagrams of 6 or more tools')
             return None
-        
+
     def download_diagram(self, url):
         venn_name = f'{self.output_dir}/vdiagram_{self.uid}'
         response = degradome.download_image(url, venn_name + '.png')
diff --git a/cami_src/consensus/cami_v2.py b/cami_src/consensus/cami_v2.py
index 5857bd3..4c27d16 100644
--- a/cami_src/consensus/cami_v2.py
+++ b/cami_src/consensus/cami_v2.py
@@ -4,12 +4,11 @@ import graph_tool as gt
 from utils.networks import trustrank, betweenness, must, closeness
 
 
-
 # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
 # TODO maybe find a smart way to cutoff automatically?
 def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
     hub_penalty = params['hub_penalty']
-    confidence_level = params.get('confidence_level',0.5)
+    confidence_level = params.get('confidence_level', 0.5)
     weighted = 'weighted' in params and params['weighted']
     ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
     trees = params.get('trees', 5)
@@ -34,6 +33,25 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
         weights = subnet.new_edge_property("double")
         for v, c in counts.items():
             weights.a[int(v)] = c
+        # sum = 0
+        # TODO idea for more sophisticated weighting: but then as prior for trustrank and not as weights
+        # for v in subnet.vertices():
+        #     c = counts[v]
+            # min_dist = None
+            # for seed in seed_lst:
+            #     dist = gt.topology.shortest_distance(ppi_graph, seed, v)
+            #     if dist == 2147483647:
+            #         dist = 10
+            #     if min_dist is None:
+            #         min_dist = dist
+            #     else:
+            #         min_dist = min(min_dist, dist)
+            # min_dist /= len(seed_lst)
+            # score = c + (10-min_dist)
+            # sum += score
+            # weights.a[int(v)] = c
+        # for v in subnet.vertices():
+        #     weights.a[int(v)] /= sum
     if ranking_method == 'trustrank':
         damping_factor = params['damping_factor']
         scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
@@ -44,8 +62,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
     elif ranking_method == 'harmonic':
         scores = closeness(subnet, hub_penalty, weights)
 
-    putative_scores = scores.a[[int(id) for id in putative_vertices]]
+    putative_scores = list(scores.a[[int(id) for id in putative_vertices]])
     putative_scores.sort()
+    putative_scores.reverse()
 
     threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
     for v in putative_vertices:
diff --git a/cami_src/consensus/cami_v3.py b/cami_src/consensus/cami_v3.py
index 8326847..8e55f10 100644
--- a/cami_src/consensus/cami_v3.py
+++ b/cami_src/consensus/cami_v3.py
@@ -57,6 +57,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
                 pass
     putative_scores = list(putative_score_map.values())
     putative_scores.sort()
+    putative_scores.reverse()
     threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
     for v in putative_vertices:
         if putative_score_map[v] >= threshold and putative_score_map[v] > 0:
diff --git a/cami_src/example_run.py b/cami_src/example_run.py
index 386b499..ab09eac 100755
--- a/cami_src/example_run.py
+++ b/cami_src/example_run.py
@@ -8,6 +8,7 @@ import subprocess
 chdir((sys.argv[0].rsplit('/', 1))[0])
 networkfile = "../data/input/networks/example_network.tsv"
 seedfile = "../data/input/seeds/example_seeds.txt"
+seedfile = "../data/input/seeds/multiple_sclerosis.tsv"
 identifier = "example_run"
-command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p --f;'
+command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p -f -v;'
 subprocess.call(command, shell=True)
\ No newline at end of file
diff --git a/cami_src/utils/networks.py b/cami_src/utils/networks.py
index e61a812..131fbdd 100644
--- a/cami_src/utils/networks.py
+++ b/cami_src/utils/networks.py
@@ -10,7 +10,7 @@ import graph_tool.util as gtu
 import itertools as it
 
 
-def edge_weights(g, base_weigths, hub_penalty, inverse=False):
+def edge_weights(g, base_weigths, hub_penalty = 0, inverse=False):
     avdeg = gts.vertex_average(g, "total")[0]
     weights = g.new_edge_property("double", val=avdeg)
     if base_weigths is not None:
@@ -237,13 +237,15 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10):
     return score_prop
 
 
-def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
+def trustrank(g, seed_ids, damping_factor, hub_penalty=0, weights=None):
     if gt.openmp_enabled():
         gt.openmp_set_num_threads(6)
-    weights = edge_weights(g, weights, hub_penalty, inverse=True)
+    weights = edge_weights(g, weights, hub_penalty, inverse=False)
+
     # Call graph-tool to compute TrustRank.
     trust = g.new_vertex_property("double")
     trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids)
+
     scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
     # Compute and return the results.
     return scores
-- 
GitLab