From 0ca1b12c92be3e8f11aa97b5e299484428aa4e7f Mon Sep 17 00:00:00 2001 From: mlmial <ml.miale@icloud.com> Date: Mon, 7 Aug 2023 00:06:00 +0200 Subject: [PATCH] edited seed variation plots --- .../seed_variation_script.py | 75 +++++++++---------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/cami_src/evaluation_scripts/seed_variation_script.py b/cami_src/evaluation_scripts/seed_variation_script.py index 3f24ee9..0ae2af9 100644 --- a/cami_src/evaluation_scripts/seed_variation_script.py +++ b/cami_src/evaluation_scripts/seed_variation_script.py @@ -52,15 +52,13 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru for tool in prediction_tools: res_table.write(f'\t{tool}_msr_ks_pvalue') - # if relevance scores file exsits: - if os.path.exists(os.path.join(cami.tmp_dir, f'{used_tools[0]}_{identifier}_relevance_scores.tsv)')): - with open(os.path.join(cami.tmp_dir, f'{used_tools[0]}_{identifier}_relevance_scores.tsv)'), 'r') as f: - for line in f: - val_name = line.split('\t')[0] - redisc_table.write(f'\t{val_name}') + # with open(os.path.join(cami.tmp_dir, f'{used_tools[0]}_{cami.uid}_relevance_scores.tsv'), 'r') as f: + # for line in f: + # val_name = line.split('\t')[0] + # res_table.write(f'\t{val_name}') res_table.write('\n') - # result dictionaries of the form {tool:list(value for each iteration)} + #result dictionaries of the form {tool:list(value for each iteration)} tp_rate_dict = {k:list() for k in used_tools} redisc_rate_dict = {k:list() for k in used_tools} module_size_dict = {k:list() for k in used_tools} @@ -89,11 +87,7 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru # reinitialize tools cami.initialize_all_tools() - # repeat consensus - if ident%20==0: - predict_and_make_consensus(cami) - else: - predict_and_make_consensus(cami) + predict_and_make_consensus(cami) used_seeds = [cami.ppi_vertex2gene[seed] for seed in cami.seed_lst] @@ -143,11 +137,11 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru p_val = kolmogorov_smirnoff.calculate_ks_p_value(list(module_size_dict[tool]), list(module_size_dict[pred_tool])) res_table.write(f'\t{p_val}') - if os.path.exists(os.path.join(cami.tmp_dir, f'{used_tools[0]}_{identifier}_relevance_scores.tsv)')): - with open(os.path.join(cami.tmp_dir, f'{tool}_{identifier}_relevance_scores.tsv'), 'r') as f: - for line in f: - rel_score = line.split('\t')[1].strip() - res_table.write(f'\t{rel_score}') + + #with open(os.path.join(cami.tmp_dir, f'{tool}_{identifier}_relevance_scores.tsv'), 'r') as f: + # for line in f: + # rel_score = line.split('\t')[1].strip() + # res_table.write(f'\t{rel_score}') res_table.write('\n') print(f'Result tables are saved in the following locations:') @@ -168,12 +162,13 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru if second_occurrence_index > -1: # replace the character at that index with the replacement character tool_name = tool[:second_occurrence_index] + '\n' + tool[second_occurrence_index + 1:] + tool_name = tool_name.replace('_hub_penalty', '').replace('damping_factor', 'df').replace('confidence_level', 'cl').replace('ranking_trustrank', 'tr') tool_labels[idx] = tool_name if plot: #PLOT # Create a figure instance #print(sys.getrecursionlimit()) - fig1, (ax1, ax5, ax4) = plt.subplots(3, 1, figsize=(20,20)) + fig1, (ax1, ax5) = plt.subplots(2, 1, figsize=(12,12)) fig1.subplots_adjust(left=0.2) # Extract Figure and Axes instance @@ -186,7 +181,7 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru for violin, tool in zip(violins1['bodies'], tools): if tool in [tw.name for tw in cami.tool_wrappers]: violin.set_facecolor('saddlebrown') - elif tool == 'first_neighbors': + elif tool == 'first_neighbours': violin.set_facecolor('orange') elif tool in ['union', 'intersection']: violin.set_facecolor('peachpuff') @@ -194,7 +189,7 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru violin.set_facecolor('red') # Add title - ax1.set_title(f'Rediscovery rate after randomly removing {nof_removals} seeds {nof_iterations} times from {identifier} seeds.', wrap=True, fontsize=14) + ax1.set_title(f'Rediscovery rate after randomly removing {nof_removals} seeds {nof_iterations} times\nfrom {identifier} seeds.', wrap=True, fontsize=14) ax1.set_xticks(list(range(1,len(tools)+1))) ax1.set_xticklabels(tool_labels) @@ -202,26 +197,26 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru ax1.set_ylabel('Rediscovery rate (<rediscovered seeds>/<removed seeds>)', wrap=True, fontsize=14) - violins2 = ax4.violinplot([tp_rate_dict[tool] for tool in tools], showmeans=True, showextrema=True) - for violinpart in list(violins2.keys())[2:]: - violins2[violinpart].set_color('k') - for violin, tool in zip(violins2['bodies'], tools): - if tool in [tw.name for tw in cami.tool_wrappers]: - violin.set_facecolor('tan') - elif tool == 'first_neighbors': - violin.set_facecolor('peachpuff') - elif tool in ['union', 'intersection']: - violin.set_facecolor('orange') - else: - violin.set_facecolor('darkorange') - # Add title - ax4.set_title(f'True positive rates after randomly removing {nof_removals} seeds {nof_iterations} times from {identifier} seeds.', wrap=True, fontsize=14) + # violins2 = ax4.violinplot([tp_rate_dict[tool] for tool in tools], showmeans=True, showextrema=True) + # for violinpart in list(violins2.keys())[2:]: + # violins2[violinpart].set_color('k') + # for violin, tool in zip(violins2['bodies'], tools): + # if tool in [tw.name for tw in cami.tool_wrappers]: + # violin.set_facecolor('tan') + # elif tool == 'first_neighbors': + # violin.set_facecolor('peachpuff') + # elif tool in ['union', 'intersection']: + # violin.set_facecolor('orange') + # else: + # violin.set_facecolor('darkorange') + # # Add title + # ax4.set_title(f'True positive rates after randomly removing {nof_removals} seeds {nof_iterations} times from {identifier} seeds.', wrap=True, fontsize=14) - ax4.set_xticks(list(range(1,len(tools)+1))) - ax4.set_xticklabels(tool_labels) - ax4.tick_params(axis='x', labelsize=11) + # ax4.set_xticks(list(range(1,len(tools)+1))) + # ax4.set_xticklabels(tool_labels) + # ax4.tick_params(axis='x', labelsize=11) - ax4.set_ylabel('Sensitivity (TP/TP + FN)', wrap=True, fontsize=14) + # ax4.set_ylabel('Sensitivity (TP/TP + FN)', wrap=True, fontsize=14) violins3 = ax5.violinplot([module_size_dict[tool] for tool in tools], showmeans=True, showextrema=True) # Add title @@ -231,14 +226,14 @@ def make_seedvariation(cami, n_iterations, removal_frac=0.2, vis=False, plot=Tru for violin, tool in zip(violins3['bodies'], tools): if tool in [tw.name for tw in cami.tool_wrappers]: violin.set_facecolor('midnightblue') - elif tool == 'first_neighbors': + elif tool == 'first_neighbours': violin.set_facecolor('mediumblue') elif tool in ['union', 'intersection']: violin.set_facecolor('lightsteelblue') else: violin.set_facecolor('royalblue') - ax5.set_title(f'Ratio of number of rediscovered seeds and predicted module size after removing {nof_removals} seeds {nof_iterations} times from {identifier} seeds.', wrap=True, fontsize=14) + ax5.set_title(f'Rediscovery module size ratio after removing {nof_removals} seeds {nof_iterations} times\nfrom {identifier} seeds.', wrap=True, fontsize=14) ax5.set_xticks(list(range(1,len(tools)+1))) ax5.set_xticklabels(tool_labels) -- GitLab