diff --git a/cami_src/evaluation_scripts/seed_variation_script.py b/cami_src/evaluation_scripts/seed_variation_script.py index 3a4c4a04065d8c479448f9e9f77d2474aacd3b2b..7724205677ebe31fc7b0860b89a3a00403011fb8 100644 --- a/cami_src/evaluation_scripts/seed_variation_script.py +++ b/cami_src/evaluation_scripts/seed_variation_script.py @@ -6,6 +6,7 @@ import random from cami_suite import cami import utils.comparison_matrix as comparison_matrix import numpy as np +from utils import kolmogorov_smirnoff def predict_and_make_consensus(cami, vis=False): result_sets = cami.make_predictions() @@ -16,7 +17,7 @@ def predict_and_make_consensus(cami, vis=False): if vis: cami.use_nvenn(download=True) -def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=False): +def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=True): identifier = cami.uid base_seeds = cami.origin_seed_lst original_seeds = [cami.ppi_vertex2gene[seed] for seed in base_seeds] @@ -26,6 +27,7 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Fal removal_frac = removal_frac nof_iterations = int(n_iterations) used_tools = list(cami.result_gene_sets.keys()) + prediction_tools = cami.prediction_tools nof_seeds = len(base_seeds) nof_removals = max([int(nof_seeds * removal_frac), 1]) @@ -44,8 +46,12 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Fal for tool in used_tools: redisc_table.write(f'\t{tool}') redisc_table.write('\n') - res_table.write('tool\trdr\trdr_std\tsensitivity\tsensitivity_std\tprecision\tprecision_std\n') - + res_table.write('tool\trdr\trdr_std\tsensitivity\tsensitivity_std\tprecision\tprecision_std') + for tool in prediction_tools: + res_table.write(f'\t{tool}_rdr_ks_pvalue') + for tool in prediction_tools: + res_table.write(f'\t{tool}_msr_ks_pvalue') + res_table.write('\n') # result dictionaries of the form {tool:list(value for each iteration)} tp_rate_dict = {k:list() for k in used_tools} @@ -121,7 +127,15 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Fal res_table.write(f'{np.mean(tp_rate_dict[tool])}\t') res_table.write(f'{np.std(tp_rate_dict[tool])}\t') res_table.write(f'{np.mean(module_size_dict[tool])}\t') - res_table.write(f'{np.std(module_size_dict[tool])}\n') + res_table.write(f'{np.std(module_size_dict[tool])}') + for pred_tool in prediction_tools: + p_val = kolmogorov_smirnoff.calculate_ks_p_value(list(redisc_rate_dict[tool]), + list(redisc_rate_dict[pred_tool])) + res_table.write(f'\t{p_val}') + for pred_tool in prediction_tools: + p_val = kolmogorov_smirnoff.calculate_ks_p_value(list(module_size_dict[tool]), + list(module_size_dict[pred_tool])) + res_table.write('\n') print(f'Result tables are saved in the following locations:') @@ -175,7 +189,6 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Fal ax1.set_ylabel('Rediscovery rate (<rediscovered seeds>/<removed seeds>)', wrap=True, fontsize=14) - violins2 = ax4.violinplot([tp_rate_dict[tool] for tool in tools], showmeans=True, showextrema=True) for violinpart in list(violins2.keys())[2:]: violins2[violinpart].set_color('k') diff --git a/cami_src/seed_variationconf b/cami_src/seed_variationconf index 8475a43afd1d84c9767a5c54ea081c1ee58f55a5..1f4e7999b89febb3184067858b0a7ca9298ac014 100644 --- a/cami_src/seed_variationconf +++ b/cami_src/seed_variationconf @@ -12,10 +12,10 @@ visualization_flag = False output_name = 'modules.out' para = 1 c = 'false' -toolweight : 1 +toolweight = 1 [diamond] -alpha : 1 +alpha = 1 pred_factor : 3 max_preds : 200 p_value_cutoff : 1e-05