Merge branch 'main' of https://gitlab.rrz.uni-hamburg.de/bay2046/cami

merge fehmarn with main

Merge branch 'main' of https://gitlab.rrz.uni-hamburg.de/bay2046/cami
2518dde2 · mlmial · 63514ba5 · a7feae0a · 2518dde2 · 2518dde2
Commit 2518dde2 authored 1 year ago by mlmial
--- a/cami_37.sh
+++ b/cami_37.sh
+# ====================================================
+# FileName: cami_37.sh
+# Author: Marcos Chow Castro <mctechnology170318@gmail.com>
+# GitHub: https://github.com/mctechnology17
+# Date: 22.11.2021 20:30
+# ====================================================
+which conda > /dev/null
+if test $? -ne 0
+then
+  echo "$0: conda is not installed. Please first install it"
+  exit 1
+fi
+copyright() {
+cat <<END
+  Copyright (c) 2021 Marcos Chow Castro
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+  The above copyright notice and this permission notice shall be included in all
+  copies or substantial portions of the Software.
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+END
+  exit 1
+}
+usage(){
+cat <<END
+  Usage:
+                    sh cami_37.sh
+                    OR
+                    ./cami_37.sh
+  If you can't run the script you have to give it the execution permissions:
+  Try:
+                    chmod +x cami_37.sh
+  And finally run:
+                   0) Create cami environment:
+                      sh cami_37.sh
+                      option: y    create cami env
+                   1) Initialize cami:
+                      conda activate cami
+                   2) Update cami:
+                      sh cami_37.sh
+                      option: u    update env cami
+                   3) Install cami certificate:
+                      sh cami_37.sh
+                      option: ce fix drugstone certificates
+                   4) Execute an example:
+                      sh cami_37.sh
+                      option: ex   execute an example
+                   5) Remove tmp file:
+                      sh cami_37.sh
+                      option: cl   remove tmp file
+END
+  exit 1
+}
+menu() {
+  echo "===================================="
+  echo "|      CAMI ENVIRONMENT INSTALLER  |"
+  echo "|----------------------------------|"
+  echo "| h    help/usage                  |"
+  echo "| y    create cami env             |"
+  echo "| a    cami activate               |"
+  echo "| u    update env cami             |"
+  echo "| ce   install cami certificate    |"
+  echo "| ex   execute an example          |"
+  echo "| cl   remove tmp file             |"
+  echo "| d    cami deactivate             |"
+  echo "| s    show env                    |"
+  echo "| r    remove cami env             |"
+  echo "| c    copyright/permission        |"
+  echo "|----------------------------------|"
+  echo "|   Press ENTER or CTRL+C to EXIT  |"
+  echo "===================================="
+  echo "Continue?"
+}
+install_cami() {
+  echo "cami: adding channels"
+  conda config --add channels defaults
+  conda config --add channels conda-forge
+  conda config --add channels bioconda
+  echo "cami: creating environment variable with python 3.7"
+  conda create --name cami -c conda-forge graph-tool python=3.7
+  echo "======================================="
+  echo "|    CAMI ENV SUCCESSFULLY CREATED    |"
+  echo "|-------------------------------------|"
+  echo "| For a complete installation         |"
+  echo "| you must follow the following steps |"
+  echo "| Initialize cami:                    |"
+  echo "|   conda activate cami               |"
+  echo "| Update cami:                        |"
+  echo "|   sh cami_37.sh                        |"
+  echo "|   opt: u    update env cami         |"
+  echo "| Install cami certificate:           |"
+  echo "|   sh cami_37.sh                        |"
+  echo "|   opt: ce fix drugstone certificate |"
+  echo "|-------------------------------------|"
+}
+darwin_certificate() {
+  if [ -d $HOME/opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
+    cat drugstone_certificates.txt >> $HOME/opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
+    echo "cami: certificate updated successfully"
+  elif [ -d /opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
+    cat drugstone_certificates.txt >> /opt/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
+    echo "cami: certificate updated successfully"
+  else
+    echo "cami: the directory certifi in cami_env NOT EXIST"
+  fi
+}
+linux_certificate() {
+  if [ -d $HOME/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
+    cat drugstone_certificates.txt >> $HOME/anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
+    echo "cami: certificate updated successfully"
+  elif [ -d /anaconda3/envs/cami/lib/python3.7/site-packages/certifi/ ]; then
+    cat drugstone_certificates.txt >> /anaconda3/envs/cami/lib/python3.7/site-packages/certifi/cacert.pem
+    echo "cami: certificate updated successfully"
+  else
+    echo "cami: the directory certifi in cami_env NOT EXIST"
+  fi
+}
+install_certificate() {
+case `uname -s` in
+  Darwin)  echo "Darwin" && darwin_certificate ;;
+  Linux)   echo "Linux" && linux_certificate ;;
+esac
+}
+if [ "$1" = "example" ] ; then
+  ./cami/example_run.py
+  exit 0
+elif [ "$1" = "clean" ] ; then
+	rm -r cami/__pycache__
+	rm -r data/tmp
+  exit 0
+fi
+remove_cami() {
+  clear
+  echo "|-------------------------|"
+  echo "| Are you sure you want to|"
+  echo "| delete cami env   [y/n]?|"
+  echo "+-------------------------+"
+  echo "| Press CTRL+C to exit    |"
+  echo "+-------------------------+"
+  read -r cami_del
+  if [ "$cami_del" = "y" ]; then
+    conda env remove -n cami
+  else
+    echo "No change made!"
+    exit 1
+  fi
+}
+menu
+read -r cami_tmp
+case $cami_tmp in
+  y|Y) install_cami ;;
+  h|H) usage ;;
+  c|C) copyright ;;
+  a|A) conda activate cami || echo "cami: If it was not activated successfully, please try manually: conda activate cami" ;;
+  u|U) conda env update cami --file=cami_37_env.yaml; pip install domino-python; pip install pcst_fast;pip install requests_html; pip install biodigest ;;
+  d|D) conda deactivate ;;
+  s|S) conda info --envs ;;
+  r|R) remove_cami ;;
+  ce|CE) install_certificate ;;
+  ex|EX) ./cami/example_run.py ;;
+  cl|CL) rm -r cami/__pycache__; rm -r data/tmp ;;
+  *) echo No change made ;;
+esac
+# vim: set sw=2 ts=2 sts=2 et ft=sh fdm=indent:
--- a/cami_37_env.yaml
+++ b/cami_37_env.yaml
+name: cami
+channels:
+  - bioconda
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.7
+  - graph-tool
+  - networkx
+  - pandas
+  - numpy
+  - jupyter
+  - pip
+  - matplotlib
+  - statsmodels
--- a/cami_src/cami.py
+++ b/cami_src/cami.py
@@ -77,7 +77,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate,
        if consensus:
            cami.reset_cami()
-    if evaluate or (not consensus and not evaluate and not seed_variation):
+    if evaluate and (consensus or seed_variation):
        cami.make_evaluation()
    # SEED VARIATION
@@ -122,7 +122,7 @@ if __name__ == "__main__":
            help="List of weights for the tools. If you have [domino, diamond, robust] as list of tools and diamonds weight should be twice as high as the other tools type: 1 2 1")
    parser.add_argument('-c', '--consensus', action='store_true', help="run only the consensus prediction part of cami")
    parser.add_argument('-var', '--seed_variation', action='store', help="repeat consensus selection multiple times (please provide the number of iterations) while removing 20 percent of the seeds.")
-    parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST")
+    parser.add_argument('-e', '--evaluate', action='store_true', help="evaluation using DIGEST", default=False)
    parser.add_argument('-o', '--output_dir', action='store', help="path to output directory", default=None)
    parser.add_argument('-id', '--identifier', action='store', help="ID for the current excecution of cami. Defaults to a randomly generated ID")
    parser.add_argument('-tmp', '--save_temps', action='store_true', help="keep all temporary files")

--- a/cami_src/cami_suite.py
+++ b/cami_src/cami_suite.py
@@ -12,7 +12,6 @@ import matplotlib.pyplot as plt
 import itertools
 def generate_param_combinations(params_dict):
    """
    Generates all possible combinations of parameters for the given function(s) and returns them as a list.
@@ -36,6 +35,7 @@ def generate_param_combinations(params_dict):
            result.append([function_name, params_str, {'params': param_dict, 'function': function}])
    return result
 def initialize_cami(path_to_ppi_file=''):
    cami_params = {}
    # find homepath aka ~/cami
@@ -91,7 +91,8 @@ class cami():
        consensus approach
    """
-    def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='', configuration='camiconf',
+    def __init__(self, ppi_graph, seed_lst, tool_wrappers, home_path, initial_seed_lst, uid=None, output_dir='',
+                 configuration='camiconf',
                 parallelization=False, ncbi=False, debug=False, save_temps=False, toolweights=None):
        """Instance variables of CAMI
@@ -268,7 +269,6 @@ class cami():
            #                                                              prefix=f'{result_set}_{self.uid}',
            #                                                              file_type='png')
    def run_threaded_tool(self, tool, pred_sets):
        """run a tool in one thread and save the results into a dictionary pred_sets
@@ -347,7 +347,7 @@ class cami():
        # set of all result genes 
        cami_scores = self.ppi_graph.vertex_properties["cami_score"]
        predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
-        consens_threshold = min(self.nof_tools, 2)
+        # consens_threshold = min(self.nof_tools, 2)
        ppi_graph = self.ppi_graph
        seed_list = self.seed_lst
        tool_name_map = self.code2toolname
@@ -358,8 +358,8 @@ class cami():
            result_sets[tool] -= set(self.seed_lst)
        params_tr = {'hub_penalty': [0.25],
-                     'damping_factor': [0.7], 
+                     'damping_factor': [0.75],
-                     'confidence_level': [0.5],
+                     'confidence_level': [0.8],
                     'ranking': ['trustrank'],
                     'function': {'cami_v3': cami_v3.run_cami}}
@@ -413,7 +413,8 @@ class cami():
            sys.setrecursionlimit(recursion_limit)
            # save the results in outputfiles
            if save_output:
-                self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
+                self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices,
+                                     cami_genes,
                                     gene_name_map, codes2tools, cami_scores)
        # add seeds to result sets for drugstone and digest
@@ -423,7 +424,6 @@ class cami():
            print(
                f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes')
    def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
                        gene_name_map, codes2tools, cami_scores):
        # save all predictions by all tools

--- a/cami_src/consensus/cami_v2.py
+++ b/cami_src/consensus/cami_v2.py
@@ -4,7 +4,6 @@ import graph_tool as gt
 from utils.networks import trustrank, betweenness, must, closeness
 # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
 # TODO maybe find a smart way to cutoff automatically?
 def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params):
@@ -34,6 +33,25 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
        weights = subnet.new_edge_property("double")
        for v, c in counts.items():
            weights.a[int(v)] = c
+        # sum = 0
+        # TODO idea for more sophisticated weighting: but then as prior for trustrank and not as weights
+        # for v in subnet.vertices():
+        #     c = counts[v]
+            # min_dist = None
+            # for seed in seed_lst:
+            #     dist = gt.topology.shortest_distance(ppi_graph, seed, v)
+            #     if dist == 2147483647:
+            #         dist = 10
+            #     if min_dist is None:
+            #         min_dist = dist
+            #     else:
+            #         min_dist = min(min_dist, dist)
+            # min_dist /= len(seed_lst)
+            # score = c + (10-min_dist)
+            # sum += score
+            # weights.a[int(v)] = c
+        # for v in subnet.vertices():
+        #     weights.a[int(v)] /= sum
    if ranking_method == 'trustrank':
        damping_factor = params['damping_factor']
        scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
@@ -44,8 +62,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
    elif ranking_method == 'harmonic':
        scores = closeness(subnet, hub_penalty, weights)
-    putative_scores = scores.a[[int(id) for id in putative_vertices]]
+    putative_scores = list(scores.a[[int(id) for id in putative_vertices]])
    putative_scores.sort()
+    putative_scores.reverse()
    threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
    for v in putative_vertices:

--- a/cami_src/consensus/cami_v3.py
+++ b/cami_src/consensus/cami_v3.py
@@ -57,6 +57,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t
                pass
    putative_scores = list(putative_score_map.values())
    putative_scores.sort()
+    putative_scores.reverse()
    threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))]
    for v in putative_vertices:
        if putative_score_map[v] >= threshold and putative_score_map[v] > 0:

--- a/cami_src/example_run.py
+++ b/cami_src/example_run.py
@@ -8,6 +8,7 @@ import subprocess
 chdir((sys.argv[0].rsplit('/', 1))[0])
 networkfile = "../data/input/networks/example_network.tsv"
 seedfile = "../data/input/seeds/example_seeds.txt"
+seedfile = "../data/input/seeds/multiple_sclerosis.tsv"
 identifier = "example_run"
-command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p --f;'
+command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -p -f -v;'
 subprocess.call(command, shell=True)
\ No newline at end of file
--- a/cami_src/utils/networks.py
+++ b/cami_src/utils/networks.py
@@ -10,7 +10,7 @@ import graph_tool.util as gtu
 import itertools as it
-def edge_weights(g, base_weigths, hub_penalty, inverse=False):
+def edge_weights(g, base_weigths, hub_penalty = 0, inverse=False):
    avdeg = gts.vertex_average(g, "total")[0]
    weights = g.new_edge_property("double", val=avdeg)
    if base_weigths is not None:
@@ -237,13 +237,15 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10):
    return score_prop
-def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
+def trustrank(g, seed_ids, damping_factor, hub_penalty=0, weights=None):
    if gt.openmp_enabled():
        gt.openmp_set_num_threads(6)
-    weights = edge_weights(g, weights, hub_penalty, inverse=True)
+    weights = edge_weights(g, weights, hub_penalty, inverse=False)
    # Call graph-tool to compute TrustRank.
    trust = g.new_vertex_property("double")
    trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids)
    scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
    # Compute and return the results.
    return scores

--- a/data/input/seeds/diabetes_mellitus_type1.list
+++ b/data/input/seeds/diabetes_mellitus_type1.list
+#ID
+entrez.57044
+entrez.326
+entrez.8691
+entrez.3710
+entrez.100303715
+entrez.8245
+entrez.3569
+entrez.3559
+entrez.3630
+entrez.3403
+entrez.3402
+entrez.3407
+entrez.3405
+entrez.3406
+entrez.3414
+entrez.3412
+entrez.3410
+entrez.100271697
+entrez.387082
+entrez.55315
+entrez.6927
+entrez.1493
+entrez.1234
--- a/data/input/seeds/diabetes_mellitus_type2.list
+++ b/data/input/seeds/diabetes_mellitus_type2.list
+#ID
+entrez.208
+entrez.9479
+entrez.8660
+entrez.169026
+entrez.4813
+entrez.4760
+entrez.4544
+entrez.10644
+entrez.3990
+entrez.3767
+entrez.7466
+entrez.3569
+entrez.3630
+entrez.3667
+entrez.3651
+entrez.3172
+entrez.3159
+entrez.2820
+entrez.2645
+entrez.50982
+entrez.11132
+entrez.6514
+entrez.6833
+entrez.6934
+entrez.6928
+entrez.6927
+entrez.5078
+entrez.5167
+entrez.9882
+entrez.56729
+entrez.5770
+entrez.100188782
+entrez.5468
+entrez.5506
--- a/data/input/seeds/hereditary_nephritis.tsv
+++ b/data/input/seeds/hereditary_nephritis.tsv
+#ID	Name
+entrez.7076	TIMP1
+entrez.3119	HLA-DQB1
+entrez.7040	TGFB1
+entrez.712	C1QA
+entrez.713	C1QB
+entrez.718	C3
+entrez.3078	CFHR1
+entrez.717	C2
+entrez.10878	CFHR3
+entrez.64231	MS4A6A
+entrez.10855	HPSE
+entrez.29015	SLC43A3
+entrez.3903	LAIR1
+entrez.3075	CFH
+entrez.1670	DEFA5
+entrez.10253	SPRY2
+entrez.8526	DGKE
+entrez.10610	ST6GALNAC2
+entrez.10184	LHFPL2
+entrez.9212	AURKB
+entrez.183	AGT
+entrez.960	CD44
+entrez.7805	LAPTM5
+entrez.56992	KIF15
+entrez.9055	PRC1
+entrez.10461	MERTK
+entrez.2305	FOXM1
+entrez.7157	TP53
+entrez.22909	FAN1
+entrez.64170	CARD9
+entrez.1636	ACE
+entrez.81494	CFHR5
+entrez.335	APOA1
+entrez.4125	MAN2B1
+entrez.10451	VAV3
+entrez.2548	GAA
+entrez.8741	TNFSF13
+entrez.4627	MYH9
+entrez.23603	CORO1C
+entrez.213	ALB
+entrez.101154649	entrez.101154649
+entrez.1880	GPR183
+entrez.3123	HLA-DRB1
+entrez.9332	CD163
+entrez.60498	entrez.60498
+entrez.11326	VSIG4
+entrez.1286	COL4A4
+entrez.5328	PLAU
+entrez.3117	HLA-DQA1
+entrez.1285	COL4A3
+entrez.100653384	entrez.100653384
+entrez.1287	COL4A5
--- a/data/input/seeds/retinal_distrophy-inherited.list
+++ b/data/input/seeds/retinal_distrophy-inherited.list
+#ID
+entrez.57096
+entrez.23568
+entrez.145226
+entrez.4647
+entrez.10083
+entrez.64072
+entrez.124590
+entrez.25861
+entrez.2527
+entrez.6103
+entrez.6121
+entrez.65217
+entrez.79883
+entrez.5015
+entrez.5631