From a7feae0a993401cc4fcc217ab7967ddeffac5771 Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Wed, 17 May 2023 19:58:52 +0200 Subject: [PATCH] fixed direction of confidence_level --- cami_src/cami.py | 4 +-- cami_src/cami_suite.py | 50 +++++++++++++++++------------------ cami_src/consensus/cami_v2.py | 25 +++++++++++++++--- cami_src/consensus/cami_v3.py | 1 + cami_src/example_run.py | 3 ++- cami_src/utils/networks.py | 8 +++--- 6 files changed, 57 insertions(+), 34 deletions(-) diff --git a/cami_src/cami.py b/cami_src/cami.py index 06969d3..0ba5922 100755 --- a/cami_src/cami.py +++ b/cami_src/cami.py @@ -77,7 +77,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, if consensus: cami.reset_cami() - if evaluate or (not consensus and not evaluate and not seed_variation): + if evaluate and (consensus or seed_variation): cami.make_evaluation() # SEED VARIATION @@ -122,7 +122,7 @@ if __name__ == "__main__": help="List of weights for the tools. If you have [domino, diamond, robust] as list of tools and diamonds weight should be twice as high as the other tools type: 1 2 1") parser.add_argument('-c', '--consensus', action='store_true', help="run only the consensus prediction part of cami") parser.add_argument('-var', '--seed_variation', action='store', help="repeat consensus selection multiple times (please provide the number of iterations) while removing 20 percent of the seeds.") - parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST") + parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST", default=False) parser.add_argument('-o', '--output_dir', action='store', help="path to output directory", default=None) parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID") parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files") diff --git a/cami_src/cami_suite.py b/cami_src/cami_suite.py index e1ed51a..7b33336 100644 --- a/cami_src/cami_suite.py +++ b/cami_src/cami_suite.py @@ -12,7 +12,6 @@ import matplotlib.pyplot as plt import itertools - def generate_param_combinations(params_dict): """ Generates all possible combinations of parameters for the given function(s) and returns them as a list. @@ -36,6 +35,7 @@ def generate_param_combinations(params_dict): result.append([function_name, params_str, {'params': param_dict, 'function': function}]) return result + def initialize_cami(path_to_ppi_file=''): cami_params = {} # find homepath aka ~/cami @@ -91,8 +91,9 @@ class cami(): consensus approach """ - def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='', configuration='camiconf', - parallelization=False, ncbi=False, debug=False,save_temps=False, toolweights=None): + def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='', + configuration='camiconf', + parallelization=False, ncbi=False, debug=False, save_temps=False, toolweights=None): """Instance variables of CAMI :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of @@ -122,7 +123,7 @@ class cami(): self.prediction_tools = [wrapper.name for wrapper in tool_wrappers] self.toolweights = toolweights self.home_path = home_path - if uid==None: + if uid == None: uid = str(uuid.uuid4()) self.uid = str(uid) if output_dir == None: @@ -132,7 +133,7 @@ class cami(): if self.debug: print(f"Output directory of cami: {output_dir}") self.output_dir = output_dir - + tmp_dir = os.path.join(home_path, 'data', 'tmp', self.uid) if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) @@ -190,7 +191,7 @@ class cami(): tool.set_weight(self.toolweights[tool.code - 1]) else: tool.set_weight() - + def initialize_all_tools(self): for tool in self.tool_wrappers: self.initialize_tool(tool) @@ -217,7 +218,7 @@ class cami(): seed_gene_lst = [self.ppi_vertex2gene[seed] for seed in self.seed_lst] ppi_graph_file = os.path.join(self.tmp_dir, f'ppi_graph_{self.uid}.graphml') self.ppi_graph.save(ppi_graph_file) - + biodigest.setup.main(setup_type="api") for result_set in self.result_module_sets: set_validation_results = biodigest.single_validation.single_validation( @@ -227,7 +228,7 @@ class cami(): distance='jaccard', ref=set(seed_gene_lst), ref_id='entrez') - + if set_validation_results['status'] == 'ok': biodigest.single_validation.save_results(set_validation_results, f'{result_set}_{self.uid}', self.output_dir) @@ -238,14 +239,14 @@ class cami(): out_dir=self.output_dir, prefix=f'{result_set}_{self.uid}', file_type='png') - + with open(os.path.join(self.tmp_dir, f'{result_set}_{self.uid}_relevance_scores.tsv'), 'w') as f: rel_score_name = list(set_validation_results['input_values']['values'].keys())[0] f.write(f'value\t{rel_score_name}\n') val_res_dct = set_validation_results['input_values']['values'][rel_score_name] for val in val_res_dct: f.write(f'{val}\t{val_res_dct[val]}\n') - + # sub_validation_results = biodigest.single_validation.single_validation( # tar=set(self.result_module_sets[result_set]), # tar_id='entrez', @@ -267,7 +268,6 @@ class cami(): # out_dir=self.output_dir, # prefix=f'{result_set}_{self.uid}', # file_type='png') - def run_threaded_tool(self, tool, pred_sets): """run a tool in one thread and save the results into a dictionary pred_sets @@ -347,7 +347,7 @@ class cami(): # set of all result genes cami_scores = self.ppi_graph.vertex_properties["cami_score"] predicted_by = self.ppi_graph.vertex_properties["predicted_by"] - consens_threshold = min(self.nof_tools, 2) + # consens_threshold = min(self.nof_tools, 2) ppi_graph = self.ppi_graph seed_list = self.seed_lst tool_name_map = self.code2toolname @@ -356,22 +356,22 @@ class cami(): # remove seeds from result sets for tool in result_sets: result_sets[tool] -= set(self.seed_lst) - + params_tr = {'hub_penalty': [0.25], - 'damping_factor': [0.7], - 'confidence_level': [0.5], + 'damping_factor': [0.75], + 'confidence_level': [0.8], 'ranking': ['trustrank'], - 'function': {'cami_v3':cami_v3.run_cami}} - + 'function': {'cami_v3': cami_v3.run_cami}} + cami_setting_list = generate_param_combinations(params_tr) camis = {} for setting in cami_setting_list: if setting[1]: - func_name = setting[0] + '_' +setting[1].rsplit('_', 1)[-1] + func_name = setting[0] + '_' + setting[1].rsplit('_', 1)[-1] else: func_name = setting[0] - + camis[func_name] = setting[2] # transform all vertex indices to their corresponding gene names in a result set @@ -413,7 +413,8 @@ class cami(): sys.setrecursionlimit(recursion_limit) # save the results in outputfiles if save_output: - self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, + self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, + cami_genes, gene_name_map, codes2tools, cami_scores) # add seeds to result sets for drugstone and digest @@ -423,7 +424,6 @@ class cami(): print( f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes') - def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, gene_name_map, codes2tools, cami_scores): # save all predictions by all tools @@ -468,8 +468,8 @@ class cami(): outputfile.write(f'{gene}\n') if self.debug: print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv') - - def visualize_and_save_comparison_matrix(self, additional_id='', + + def visualize_and_save_comparison_matrix(self, additional_id='', title='Intersections of result_gene_sets of all analyzed algorithms.'): """Create a comparison matrix of the results of all tools. And save it as png file. """ @@ -489,7 +489,7 @@ class cami(): plt.close(comp_fig) plt.close(norm_fig) return comp_fig_file, norm_fig_file - + def use_nvenn(self, download=False): """Create Venn Diagrams via a external tool named nvenn by degradome. Sends a request via requests to the degradome server. @@ -510,7 +510,7 @@ class cami(): else: print('Cannot use degradome to create venn diagrams of 6 or more tools') return None - + def download_diagram(self, url): venn_name = f'{self.output_dir}/vdiagram_{self.uid}' response = degradome.download_image(url, venn_name + '.png') diff --git a/cami_src/consensus/cami_v2.py b/cami_src/consensus/cami_v2.py index 5857bd3..4c27d16 100644 --- a/cami_src/consensus/cami_v2.py +++ b/cami_src/consensus/cami_v2.py @@ -4,12 +4,11 @@ import graph_tool as gt from utils.networks import trustrank, betweenness, must, closeness - # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries # TODO maybe find a smart way to cutoff automatically? def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params): hub_penalty = params['hub_penalty'] - confidence_level = params.get('confidence_level',0.5) + confidence_level = params.get('confidence_level', 0.5) weighted = 'weighted' in params and params['weighted'] ranking_method = params['ranking'] if 'ranking' in params else 'trustrank' trees = params.get('trees', 5) @@ -34,6 +33,25 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t weights = subnet.new_edge_property("double") for v, c in counts.items(): weights.a[int(v)] = c + # sum = 0 + # TODO idea for more sophisticated weighting: but then as prior for trustrank and not as weights + # for v in subnet.vertices(): + # c = counts[v] + # min_dist = None + # for seed in seed_lst: + # dist = gt.topology.shortest_distance(ppi_graph, seed, v) + # if dist == 2147483647: + # dist = 10 + # if min_dist is None: + # min_dist = dist + # else: + # min_dist = min(min_dist, dist) + # min_dist /= len(seed_lst) + # score = c + (10-min_dist) + # sum += score + # weights.a[int(v)] = c + # for v in subnet.vertices(): + # weights.a[int(v)] /= sum if ranking_method == 'trustrank': damping_factor = params['damping_factor'] scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) @@ -44,8 +62,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t elif ranking_method == 'harmonic': scores = closeness(subnet, hub_penalty, weights) - putative_scores = scores.a[[int(id) for id in putative_vertices]] + putative_scores = list(scores.a[[int(id) for id in putative_vertices]]) putative_scores.sort() + putative_scores.reverse() threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] for v in putative_vertices: diff --git a/cami_src/consensus/cami_v3.py b/cami_src/consensus/cami_v3.py index 8326847..8e55f10 100644 --- a/cami_src/consensus/cami_v3.py +++ b/cami_src/consensus/cami_v3.py @@ -57,6 +57,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t pass putative_scores = list(putative_score_map.values()) putative_scores.sort() + putative_scores.reverse() threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] for v in putative_vertices: if putative_score_map[v] >= threshold and putative_score_map[v] > 0: diff --git a/cami_src/example_run.py b/cami_src/example_run.py index 386b499..ab09eac 100755 --- a/cami_src/example_run.py +++ b/cami_src/example_run.py @@ -8,6 +8,7 @@ import subprocess chdir((sys.argv[0].rsplit('/', 1))[0]) networkfile = "../data/input/networks/example_network.tsv" seedfile = "../data/input/seeds/example_seeds.txt" +seedfile = "../data/input/seeds/multiple_sclerosis.tsv" identifier = "example_run" -command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p --f;' +command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p -f -v;' subprocess.call(command, shell=True) \ No newline at end of file diff --git a/cami_src/utils/networks.py b/cami_src/utils/networks.py index e61a812..131fbdd 100644 --- a/cami_src/utils/networks.py +++ b/cami_src/utils/networks.py @@ -10,7 +10,7 @@ import graph_tool.util as gtu import itertools as it -def edge_weights(g, base_weigths, hub_penalty, inverse=False): +def edge_weights(g, base_weigths, hub_penalty = 0, inverse=False): avdeg = gts.vertex_average(g, "total")[0] weights = g.new_edge_property("double", val=avdeg) if base_weigths is not None: @@ -237,13 +237,15 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10): return score_prop -def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None): +def trustrank(g, seed_ids, damping_factor, hub_penalty=0, weights=None): if gt.openmp_enabled(): gt.openmp_set_num_threads(6) - weights = edge_weights(g, weights, hub_penalty, inverse=True) + weights = edge_weights(g, weights, hub_penalty, inverse=False) + # Call graph-tool to compute TrustRank. trust = g.new_vertex_property("double") trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids) + scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights) # Compute and return the results. return scores -- GitLab