Skip to content
Snippets Groups Projects
Commit 2518dde2 authored by mlmial's avatar mlmial
Browse files
merge fehmarn with main
parents 63514ba5 a7feae0a
No related branches found
No related tags found
No related merge requests found
# ====================================================
# FileName: cami_37.sh
# Author: Marcos Chow Castro <mctechnology170318@gmail.com>
# GitHub: https://github.com/mctechnology17
# Date: 22.11.2021 20:30
# ====================================================
which conda > /dev/null
if test $? -ne 0
then
echo "$0: conda is not installed. Please first install it"
exit 1
fi
copyright() {
cat <<END
Copyright (c) 2021 Marcos Chow Castro
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
END
exit 1
}
usage(){
cat <<END
Usage:
sh cami_37.sh
OR
./cami_37.sh
If you can't run the script you have to give it the execution permissions:
Try:
chmod +x cami_37.sh
And finally run:
0) Create cami environment:
sh cami_37.sh
option: y create cami env
1) Initialize cami:
conda activate cami
2) Update cami:
sh cami_37.sh
option: u update env cami
3) Install cami certificate:
sh cami_37.sh
option: ce fix drugstone certificates
4) Execute an example:
sh cami_37.sh
option: ex execute an example
5) Remove tmp file:
sh cami_37.sh
option: cl remove tmp file
END
exit 1
}
menu() {
echo "===================================="
echo "| CAMI ENVIRONMENT INSTALLER |"
echo "|----------------------------------|"
echo "| h help/usage |"
echo "| y create cami env |"
echo "| a cami activate |"
echo "| u update env cami |"
echo "| ce install cami certificate |"
echo "| ex execute an example |"
echo "| cl remove tmp file |"
echo "| d cami deactivate |"
echo "| s show env |"
echo "| r remove cami env |"
echo "| c copyright/permission |"
echo "|----------------------------------|"
echo "| Press ENTER or CTRL+C to EXIT |"
echo "===================================="
echo "Continue?"
}
install_cami() {
echo "cami: adding channels"
conda config --add channels defaults
conda config --add channels conda-forge
conda config --add channels bioconda
echo "cami: creating environment variable with python 3.7"
conda create --name cami -c conda-forge graph-tool python=3.7
echo "======================================="
echo "| CAMI ENV SUCCESSFULLY CREATED |"
echo "|-------------------------------------|"
echo "| For a complete installation |"
echo "| you must follow the following steps |"
echo "| Initialize cami: |"
echo "| conda activate cami |"
echo "| Update cami: |"
echo "| sh cami_37.sh |"
echo "| opt: u update env cami |"
echo "| Install cami certificate: |"
echo "| sh cami_37.sh |"
echo "| opt: ce fix drugstone certificate |"
echo "|-------------------------------------|"
}
darwin_certificate() {
if [ -d $HOME/opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
cat drugstone_certificates.txt >> $HOME/opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
echo "cami: certificate updated successfully"
elif [ -d /opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
cat drugstone_certificates.txt >> /opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
echo "cami: certificate updated successfully"
else
echo "cami: the directory certifi in cami_env NOT EXIST"
fi
}
linux_certificate() {
if [ -d $HOME/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
cat drugstone_certificates.txt >> $HOME/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
echo "cami: certificate updated successfully"
elif [ -d /anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
cat drugstone_certificates.txt >> /anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
echo "cami: certificate updated successfully"
else
echo "cami: the directory certifi in cami_env NOT EXIST"
fi
}
install_certificate() {
case `uname -s` in
Darwin) echo "Darwin" && darwin_certificate ;;
Linux) echo "Linux" && linux_certificate ;;
esac
}
if [ "$1" = "example" ] ; then
./cami/example_run.py
exit 0
elif [ "$1" = "clean" ] ; then
rm -r cami/__pycache__
rm -r data/tmp
exit 0
fi
remove_cami() {
clear
echo "|-------------------------|"
echo "| Are you sure you want to|"
echo "| delete cami env [y/n]?|"
echo "+-------------------------+"
echo "| Press CTRL+C to exit |"
echo "+-------------------------+"
read -r cami_del
if [ "$cami_del" = "y" ]; then
conda env remove -n cami
else
echo "No change made!"
exit 1
fi
}
menu
read -r cami_tmp
case $cami_tmp in
y|Y) install_cami ;;
h|H) usage ;;
c|C) copyright ;;
a|A) conda activate cami || echo "cami: If it was not activated successfully, please try manually: conda activate cami" ;;
u|U) conda env update cami --file=cami_37_env.yaml; pip install domino-python; pip install pcst_fast;pip install requests_html; pip install biodigest ;;
d|D) conda deactivate ;;
s|S) conda info --envs ;;
r|R) remove_cami ;;
ce|CE) install_certificate ;;
ex|EX) ./cami/example_run.py ;;
cl|CL) rm -r cami/__pycache__; rm -r data/tmp ;;
*) echo No change made ;;
esac
# vim: set sw=2 ts=2 sts=2 et ft=sh fdm=indent:
name: cami
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- python=3.7
- graph-tool
- networkx
- pandas
- numpy
- jupyter
- pip
- matplotlib
- statsmodels
...@@ -77,7 +77,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, ...@@ -77,7 +77,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
if consensus: if consensus:
cami.reset_cami() cami.reset_cami()
if evaluate or (not consensus and not evaluate and not seed_variation): if evaluate and (consensus or seed_variation):
cami.make_evaluation() cami.make_evaluation()
# SEED VARIATION # SEED VARIATION
...@@ -122,7 +122,7 @@ if __name__ == "__main__": ...@@ -122,7 +122,7 @@ if __name__ == "__main__":
help="List of weights for the tools. If you have [domino, diamond, robust] as list of tools and diamonds weight should be twice as high as the other tools type: 1 2 1") help="List of weights for the tools. If you have [domino, diamond, robust] as list of tools and diamonds weight should be twice as high as the other tools type: 1 2 1")
parser.add_argument('-c', '--consensus', action='store_true', help="run only the consensus prediction part of cami") parser.add_argument('-c', '--consensus', action='store_true', help="run only the consensus prediction part of cami")
parser.add_argument('-var', '--seed_variation', action='store', help="repeat consensus selection multiple times (please provide the number of iterations) while removing 20 percent of the seeds.") parser.add_argument('-var', '--seed_variation', action='store', help="repeat consensus selection multiple times (please provide the number of iterations) while removing 20 percent of the seeds.")
parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST") parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST", default=False)
parser.add_argument('-o', '--output_dir', action='store', help="path to output directory", default=None) parser.add_argument('-o', '--output_dir', action='store', help="path to output directory", default=None)
parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID") parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID")
parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files") parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files")
......
...@@ -12,7 +12,6 @@ import matplotlib.pyplot as plt ...@@ -12,7 +12,6 @@ import matplotlib.pyplot as plt
import itertools import itertools
def generate_param_combinations(params_dict): def generate_param_combinations(params_dict):
""" """
Generates all possible combinations of parameters for the given function(s) and returns them as a list. Generates all possible combinations of parameters for the given function(s) and returns them as a list.
...@@ -36,6 +35,7 @@ def generate_param_combinations(params_dict): ...@@ -36,6 +35,7 @@ def generate_param_combinations(params_dict):
result.append([function_name, params_str, {'params': param_dict, 'function': function}]) result.append([function_name, params_str, {'params': param_dict, 'function': function}])
return result return result
def initialize_cami(path_to_ppi_file=''): def initialize_cami(path_to_ppi_file=''):
cami_params = {} cami_params = {}
# find homepath aka ~/cami # find homepath aka ~/cami
...@@ -91,7 +91,8 @@ class cami(): ...@@ -91,7 +91,8 @@ class cami():
consensus approach consensus approach
""" """
def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='', configuration='camiconf', def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='',
configuration='camiconf',
parallelization=False, ncbi=False, debug=False, save_temps=False, toolweights=None): parallelization=False, ncbi=False, debug=False, save_temps=False, toolweights=None):
"""Instance variables of CAMI """Instance variables of CAMI
...@@ -268,7 +269,6 @@ class cami(): ...@@ -268,7 +269,6 @@ class cami():
# prefix=f'{result_set}_{self.uid}', # prefix=f'{result_set}_{self.uid}',
# file_type='png') # file_type='png')
def run_threaded_tool(self, tool, pred_sets): def run_threaded_tool(self, tool, pred_sets):
"""run a tool in one thread and save the results into a dictionary pred_sets """run a tool in one thread and save the results into a dictionary pred_sets
...@@ -347,7 +347,7 @@ class cami(): ...@@ -347,7 +347,7 @@ class cami():
# set of all result genes # set of all result genes
cami_scores = self.ppi_graph.vertex_properties["cami_score"] cami_scores = self.ppi_graph.vertex_properties["cami_score"]
predicted_by = self.ppi_graph.vertex_properties["predicted_by"] predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
consens_threshold = min(self.nof_tools, 2) # consens_threshold = min(self.nof_tools, 2)
ppi_graph = self.ppi_graph ppi_graph = self.ppi_graph
seed_list = self.seed_lst seed_list = self.seed_lst
tool_name_map = self.code2toolname tool_name_map = self.code2toolname
...@@ -358,8 +358,8 @@ class cami(): ...@@ -358,8 +358,8 @@ class cami():
result_sets[tool] -= set(self.seed_lst) result_sets[tool] -= set(self.seed_lst)
params_tr = {'hub_penalty': [0.25], params_tr = {'hub_penalty': [0.25],
'damping_factor': [0.7], 'damping_factor': [0.75],
'confidence_level': [0.5], 'confidence_level': [0.8],
'ranking': ['trustrank'], 'ranking': ['trustrank'],
'function': {'cami_v3': cami_v3.run_cami}} 'function': {'cami_v3': cami_v3.run_cami}}
...@@ -413,7 +413,8 @@ class cami(): ...@@ -413,7 +413,8 @@ class cami():
sys.setrecursionlimit(recursion_limit) sys.setrecursionlimit(recursion_limit)
# save the results in outputfiles # save the results in outputfiles
if save_output: if save_output:
self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices,
cami_genes,
gene_name_map, codes2tools, cami_scores) gene_name_map, codes2tools, cami_scores)
# add seeds to result sets for drugstone and digest # add seeds to result sets for drugstone and digest
...@@ -423,7 +424,6 @@ class cami(): ...@@ -423,7 +424,6 @@ class cami():
print( print(
f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes') f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, cami_scores): gene_name_map, codes2tools, cami_scores):
# save all predictions by all tools # save all predictions by all tools
......
...@@ -4,7 +4,6 @@ import graph_tool as gt ...@@ -4,7 +4,6 @@ import graph_tool as gt
from utils.networks import trustrank, betweenness, must, closeness from utils.networks import trustrank, betweenness, must, closeness
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically? # TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params): def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
...@@ -34,6 +33,25 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t ...@@ -34,6 +33,25 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
weights = subnet.new_edge_property("double") weights = subnet.new_edge_property("double")
for v, c in counts.items(): for v, c in counts.items():
weights.a[int(v)] = c weights.a[int(v)] = c
# sum = 0
# TODO idea for more sophisticated weighting: but then as prior for trustrank and not as weights
# for v in subnet.vertices():
# c = counts[v]
# min_dist = None
# for seed in seed_lst:
# dist = gt.topology.shortest_distance(ppi_graph, seed, v)
# if dist == 2147483647:
# dist = 10
# if min_dist is None:
# min_dist = dist
# else:
# min_dist = min(min_dist, dist)
# min_dist /= len(seed_lst)
# score = c + (10-min_dist)
# sum += score
# weights.a[int(v)] = c
# for v in subnet.vertices():
# weights.a[int(v)] /= sum
if ranking_method == 'trustrank': if ranking_method == 'trustrank':
damping_factor = params['damping_factor'] damping_factor = params['damping_factor']
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
...@@ -44,8 +62,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t ...@@ -44,8 +62,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
elif ranking_method == 'harmonic': elif ranking_method == 'harmonic':
scores = closeness(subnet, hub_penalty, weights) scores = closeness(subnet, hub_penalty, weights)
putative_scores = scores.a[[int(id) for id in putative_vertices]] putative_scores = list(scores.a[[int(id) for id in putative_vertices]])
putative_scores.sort() putative_scores.sort()
putative_scores.reverse()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
for v in putative_vertices: for v in putative_vertices:
......
...@@ -57,6 +57,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t ...@@ -57,6 +57,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
pass pass
putative_scores = list(putative_score_map.values()) putative_scores = list(putative_score_map.values())
putative_scores.sort() putative_scores.sort()
putative_scores.reverse()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
for v in putative_vertices: for v in putative_vertices:
if putative_score_map[v] >= threshold and putative_score_map[v] > 0: if putative_score_map[v] >= threshold and putative_score_map[v] > 0:
......
...@@ -8,6 +8,7 @@ import subprocess ...@@ -8,6 +8,7 @@ import subprocess
chdir((sys.argv[0].rsplit('/', 1))[0]) chdir((sys.argv[0].rsplit('/', 1))[0])
networkfile = "../data/input/networks/example_network.tsv" networkfile = "../data/input/networks/example_network.tsv"
seedfile = "../data/input/seeds/example_seeds.txt" seedfile = "../data/input/seeds/example_seeds.txt"
seedfile = "../data/input/seeds/multiple_sclerosis.tsv"
identifier = "example_run" identifier = "example_run"
command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p --f;' command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p -f -v;'
subprocess.call(command, shell=True) subprocess.call(command, shell=True)
\ No newline at end of file
...@@ -10,7 +10,7 @@ import graph_tool.util as gtu ...@@ -10,7 +10,7 @@ import graph_tool.util as gtu
import itertools as it import itertools as it
def edge_weights(g, base_weigths, hub_penalty, inverse=False): def edge_weights(g, base_weigths, hub_penalty = 0, inverse=False):
avdeg = gts.vertex_average(g, "total")[0] avdeg = gts.vertex_average(g, "total")[0]
weights = g.new_edge_property("double", val=avdeg) weights = g.new_edge_property("double", val=avdeg)
if base_weigths is not None: if base_weigths is not None:
...@@ -237,13 +237,15 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10): ...@@ -237,13 +237,15 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10):
return score_prop return score_prop
def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None): def trustrank(g, seed_ids, damping_factor, hub_penalty=0, weights=None):
if gt.openmp_enabled(): if gt.openmp_enabled():
gt.openmp_set_num_threads(6) gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True) weights = edge_weights(g, weights, hub_penalty, inverse=False)
# Call graph-tool to compute TrustRank. # Call graph-tool to compute TrustRank.
trust = g.new_vertex_property("double") trust = g.new_vertex_property("double")
trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids) trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights) scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
# Compute and return the results. # Compute and return the results.
return scores return scores
......
#ID
entrez.57044
entrez.326
entrez.8691
entrez.3710
entrez.100303715
entrez.8245
entrez.3569
entrez.3559
entrez.3630
entrez.3403
entrez.3402
entrez.3407
entrez.3405
entrez.3406
entrez.3414
entrez.3412
entrez.3410
entrez.100271697
entrez.387082
entrez.55315
entrez.6927
entrez.1493
entrez.1234
#ID
entrez.208
entrez.9479
entrez.8660
entrez.169026
entrez.4813
entrez.4760
entrez.4544
entrez.10644
entrez.3990
entrez.3767
entrez.7466
entrez.3569
entrez.3630
entrez.3667
entrez.3651
entrez.3172
entrez.3159
entrez.2820
entrez.2645
entrez.50982
entrez.11132
entrez.6514
entrez.6833
entrez.6934
entrez.6928
entrez.6927
entrez.5078
entrez.5167
entrez.9882
entrez.56729
entrez.5770
entrez.100188782
entrez.5468
entrez.5506
#ID Name
entrez.7076 TIMP1
entrez.3119 HLA-DQB1
entrez.7040 TGFB1
entrez.712 C1QA
entrez.713 C1QB
entrez.718 C3
entrez.3078 CFHR1
entrez.717 C2
entrez.10878 CFHR3
entrez.64231 MS4A6A
entrez.10855 HPSE
entrez.29015 SLC43A3
entrez.3903 LAIR1
entrez.3075 CFH
entrez.1670 DEFA5
entrez.10253 SPRY2
entrez.8526 DGKE
entrez.10610 ST6GALNAC2
entrez.10184 LHFPL2
entrez.9212 AURKB
entrez.183 AGT
entrez.960 CD44
entrez.7805 LAPTM5
entrez.56992 KIF15
entrez.9055 PRC1
entrez.10461 MERTK
entrez.2305 FOXM1
entrez.7157 TP53
entrez.22909 FAN1
entrez.64170 CARD9
entrez.1636 ACE
entrez.81494 CFHR5
entrez.335 APOA1
entrez.4125 MAN2B1
entrez.10451 VAV3
entrez.2548 GAA
entrez.8741 TNFSF13
entrez.4627 MYH9
entrez.23603 CORO1C
entrez.213 ALB
entrez.101154649 entrez.101154649
entrez.1880 GPR183
entrez.3123 HLA-DRB1
entrez.9332 CD163
entrez.60498 entrez.60498
entrez.11326 VSIG4
entrez.1286 COL4A4
entrez.5328 PLAU
entrez.3117 HLA-DQA1
entrez.1285 COL4A3
entrez.100653384 entrez.100653384
entrez.1287 COL4A5
#ID
entrez.57096
entrez.23568
entrez.145226
entrez.4647
entrez.10083
entrez.64072
entrez.124590
entrez.25861
entrez.2527
entrez.6103
entrez.6121
entrez.65217
entrez.79883
entrez.5015
entrez.5631
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment