Skip to content
Snippets Groups Projects
Select Git revision
  • f088942e14872fc0ceedd49bced5acbe17012f69
  • main default protected
  • arts-xml-data-2.6
  • arts-xml-data-2.4
  • arts-xml-data-2.2
  • arts-xml-data-2.0
6 results

ChangeLog

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    cami_suite.py 22.29 KiB
    import threading, biodigest, os
    from utils import drugstone, degradome, ncbi
    from algorithms.DiamondWrapper import DiamondWrapper
    from algorithms.DominoWrapper import DominoWrapper
    from algorithms.RobustWrapper import RobustWrapper
    from configparser import ConfigParser
    import preprocess
    from consensus import cami_v1, cami_v2, cami_v3
    
    
    def list_combinations(lst, k):
        """creates all possible combinations of length k with two objects in a list
    
        :param lst: a list with length 2
        :type lst: list()
        :param k: length of the combinations
        :type k: int
        """
        nof_combs = int(2 ** k)
        l = int(nof_combs / 2)
        columns = []
        while l >= 1:
            column = []
            while len(column) < nof_combs:
                for _ in range(l):
                    column.append(lst[0])
                for _ in range(l):
                    column.append(lst[1])
            columns.append(column)
            l = l/2
            if l >= 1:
                l=int(l)
        combs = [tuple([column[i] for column in columns]) for i in range(nof_combs)]
        assert len(set(combs)) == nof_combs
        return(combs)
    
    def initialize_cami(path_to_ppi_file=''):
        cami_params = {}
        # find homepath aka ~/cami
        current_wd = os.getcwd()
        current_wd_lst = current_wd.rsplit('/', 1)
        current_folder = current_wd_lst[-1]
        while current_folder != 'cami':
            os.chdir(current_wd_lst[0])
            current_wd = os.getcwd()
            current_wd_lst = current_wd.rsplit('/', 1)
            current_folder = current_wd_lst[-1]
        home_path = current_wd
        cami_params['home_path'] = home_path
        cami_source = os.path.join(home_path, 'cami_src')
        cami_params['cami_src_path'] = cami_source
        
        # initialize tool wrappers
        diamond = DiamondWrapper()
        domino = DominoWrapper()
        robust = RobustWrapper()
        wrappers = [diamond, domino, robust]
        nof_tools = len(wrappers)
        cami_params['tool_wrappers'] = wrappers
        
        # preprocessing
        if path_to_ppi_file == '':
            ppi_network = 'example_network.tsv'
            ppi_file = os.path.join(home_path, f'data/input/networks/{ppi_network}')
        else:
            ppi_file = path_to_ppi_file
            
        symbol_columns = [] # if the two symbol columns in the ppi_network file are not named
                                # ['Official_Symbol_Interactor_A', 'Official_Symbol_Interactor_B']
                                # provide the names here.
        ppi_graph = preprocess.csv2graph(ppi_file, symbol_columns, nof_tools)
        
        cami_params['ppi_graph'] = ppi_graph
        
        # dictionary with name of the seeds and file 
        seed_directory = os.path.join(home_path, 'data/input/seeds')
        seed_files = {'adhd':'adhd.tsv',\
                    'alcl':'alcl.tsv',\
                    'joubert':'joubert_syndrome.tsv'}
        seed_paths = {}
        for seed_file in seed_files:
            seed_paths[seed_file] = os.path.join(seed_directory, seed_files[seed_file])
    
        seed_lists = {seedname:preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths}
    
        
    
    class cami():
        """ A module that is used for Active Module identifaction based on a
            consensus approach
        """
        def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf', seed_score=10, parallelization=False):
            """Instance variables of CAMI
    
            :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
            :type ppi_graph: Graph()
            :param seed_lst: A list of vertices that are the seeds for the predictions
            :type seed_lst: Graph().vertex()
            :param tool_wrappers: A list of AlgorithmWrappers() that correspond to the tools used for the predictions
            :type tool_wrappers: list(AlgorithmWrapper())
            :param output_dir: The path to the directory where the results are supposed to be saved
            :type output_dir: str
            :param uid: Identifier for the current excecution of CAMI
            :type uid: str
            :param tmp_dir: Directory where temporary files should be saved
            :type tmp_dir: str
            :param home_path: Path to the cami home directory (gitlab repository)
            :type home_path: str
            """        
            self.ppi_graph = ppi_graph
            self.origin_ppi_graph = ppi_graph.copy()
            self.ppi_vertex2gene = self.ppi_graph.vertex_properties["name"]
            self.ppi_gene2vertex = {self.ppi_vertex2gene[vertex]:vertex for vertex in self.ppi_graph.vertices()}
            self.initial_seed_lst = None
            self.seed_lst = seed_lst
            self.origin_seed_lst = seed_lst.copy()
            self.tool_wrappers = tool_wrappers
            self.output_dir = output_dir
            self.home_path = home_path
            self.uid = str(uid)
            if tmp_dir == '':
                tmp_dir = os.path.join(home_path, 'data', 'tmp', self.uid)
            self.tmp_dir = tmp_dir
            
            self.nof_tools = len(tool_wrappers)
            self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices)
            self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers}
            self.code2toolname[0] = 'CAMI'
            self.cami_vertices = []
            self.ncbi = False
            
            config = ConfigParser()
            config.read('camiconf')
            self.seed_score = config.get('cami', 'seed_score')
            self.config = config
            self.threaded = parallelization
            # set weights for seed genes in ppi_graph
            for seed in self.seed_lst:
                self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score
        
        def reset_cami(self, new_uid='', change_tmp=False):
            if not new_uid == '':
                self.uid = new_uid
            if change_tmp:
                new_tmp_dir = os.path.join(self.home_path,
                                            'data',
                                            self.uid) 
                os.makedirs(new_tmp_dir)
                self.tmp_dir = new_tmp_dir
            self.ppi_graph = self.origin_ppi_graph.copy()
            self.result_gene_sets = {}
            self.cami_vertices = []
            self.seed_lst = self.origin_seed_lst.copy()
    
        def set_initial_seed_lst(self, seedlst):
            self.initial_seed_lst = seedlst
    
        def initialize_tool(self, tool):
            tool.set_ppi_network(self.ppi_graph)
            tool.set_seeds(self.seed_lst)
            tool.set_homepath(self.home_path)
            tool.set_id(self.uid)
            tool.set_config(self.config)
            
        def initialize_all_tools(self):
            for tool in self.tool_wrappers:
                self.initialize_tool(tool)
    
        def run_tool(self, tool):
            """Excecute the predictions using the AlgorithmWrapper() of a tool
    
            :param tool: A tool that has the following methods: prepare_input, run_tool() and extract_output()
            :type tool: AlgorithmWrapper()
            :return: A set of predicted vertices by the used tool
            :rtype: set()
            """
            tool.create_tmp_output_dir(self.tmp_dir) # creates the temporary input directory
            print(f"preparing {tool.name} input...")
            inputparams = tool.prepare_input()
            print(f'running {tool.name}...')
            preds = set(tool.run_algorithm(inputparams))
            print(f'{tool.name} predicted {len(preds)} active vertices (seeds not excluded):')
            print(preds)
            return preds
    
        def make_evaluation(self):
            print(self.result_gene_sets)
            biodigest.setup.main(setup_type="api")
            for result_set in self.result_gene_sets:
                validation_results = biodigest.single_validation.single_validation(
                    tar=set(self.result_gene_sets[result_set]),
                    tar_id='entrez',
                    mode='set-set',
                    distance='jaccard',
                    ref=set(self.seed_lst), 
                    ref_id='entrez')
                if validation_results['status'] == 'ok':  
                    biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir)
                    biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results, 
                                 mode='set-set', 
                                 tar=set(self.result_gene_sets[result_set]), 
                                 tar_id='entrez', 
                                 out_dir=self.output_dir, 
                                 prefix=f'{result_set}_{self.uid}')
        def run_threaded_tool(self, tool, pred_sets):
            """run a tool in one thread and save the results into a dictionary pred_sets
    
            Args:
                tool (AlgorithmWrapper): Wrapper class for a tool
                pred_sets (dict): a dictionary that maps a tool to its result set
            """
            preds = self.run_tool(tool)
            pred_sets[tool] = preds #- seed_set
    
        def make_predictions(self) -> dict:
            """create all predictions using the tools specified in tool_wrappers
    
            :return: A dictionary that saves the predicted vertices with respect
                     to the corresponding tool
            :rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
            """
            print(f'Creating result sets of all {self.nof_tools} tools...')
            pred_sets = {tool:None for tool in self.tool_wrappers}
            
            if self.threaded:
                threads = [threading.Thread(target=self.run_threaded_tool, args=(tool, pred_sets,)) 
                        for tool in self.tool_wrappers]
                for thread in threads:
                    thread.start()
                    
                for thread in threads:
                    thread.join()
            else:
                for tool in self.tool_wrappers:
                    pred_sets[tool] = self.run_tool(tool)
            
            assert(list(pred_sets.values()).count(None) < 1)
            result_sets = {tool:set([self.ppi_graph.vertex(idx) for idx in pred_sets[tool]])
                           for tool in pred_sets}
            return result_sets
    
        def take_custom_results(self, inputfiles, result_sets={}):
            """Takes a list of inputfiles and extracts the results from them to
               include them in the consensus with the tools of CAMI
    
            :param inputfiles: A list of dictionaries with the following properties:
                               key: The used tool name
                               values: the paths to result files of these tools
            :type inputfiles: list(dict)
            :return: A dictionary that saves the predicted vertices with respect
                     to the corresponding tool
            :rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
            """
            for tool in inputfiles:
                result_list = []
                with open(inputfiles[tool]) as rfile:
                    for idx, line in enumerate(rfile):
                        if idx == 0:
                            tool.name = line.strip()
                            self.code2toolname[tool.code] = tool.name
                        else:
                            node = line.strip()
                            if node in self.ppi_gene2vertex:
                                result_list.append(self.ppi_gene2vertex[node])
                    result_sets[tool] = set(result_list)
            return result_sets
    
        def create_consensus(self, result_sets):
            """takes a set of active module predictions and creates a consensus
               that combines all the results of the different tools.
    
            :param result_sets: A dictionary with the following properties:
                              key: The used tool as AlgorithmWrapper() Object
                              values: Set of vertices in the ppi_graph that were
                                      predicted by the key-tool
            :type result_sets: {AlgorithmWrapper(): {Graph().vertex()}}
            """
            # calculate gene weights
            # set of all result genes 
            cami_scores = self.ppi_graph.vertex_properties["cami_score"]
            predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
            consens_threshold = min(self.nof_tools, 2)
            ppi_graph = self.ppi_graph
            seed_list = self.seed_lst
            tool_name_map = self.code2toolname
            gene_name_map = self.ppi_vertex2gene
    
            camis = {
                'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}},
                'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': {
                    'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
                }},
                'cami_v2_param1_bc': {'function': cami_v2.run_cami, 'params': {
                    'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
                }},
                'cami_v2_param1_m': {'function': cami_v2.run_cami, 'params': {
                    'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
                }},
                'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': {
                    'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5
                }},
                'cami_v2_param2_m': {'function': cami_v2.run_cami, 'params': {
                    'hub_penalty': 0, 'damping_factor': 0.7, 'ranking': 'must',
                }},
                'cami_v2_param2_bc': {'function': cami_v2.run_cami, 'params': {
                    'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
                }},
                'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': {
                    'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75
                }},
                'cami_v3_param1_bc': {'function': cami_v3.run_cami, 'params': {
                    'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness'
                }},
                'cami_v3_param1_m': {'function': cami_v3.run_cami, 'params': {
                    'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must'
                }},
                'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': {
                    'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'trustrank'
                }},
                'cami_v3_param2_bc': {'function': cami_v3.run_cami, 'params': {
                    'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness'
                }},
                'cami_v3_param2_m': {'function': cami_v3.run_cami, 'params': {
                    'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must'
                }},
                'cami_v3_param3_m': {'function': cami_v3.run_cami, 'params': {
                    'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must', 'trees': 15
                }},
            }
    
            for cami_method_name, cami_params in camis.items():
                print("Running " + cami_method_name)
                cami_vertices, putative_vertices, codes2tools = cami_params['function'](result_sets, ppi_graph, seed_list,
                                                                                        predicted_by, cami_scores,
                                                                                        tool_name_map,
                                                                                        cami_params['params'])
    
                # sort the resulting vertices according to their cami_score
                cami_vlist = sorted(cami_vertices, key=lambda v: cami_scores[v], reverse=True)
    
                seed_genes = [self.ppi_vertex2gene[seed_vertex] for seed_vertex in seed_list]
                # translate the resulting vertex() ids to the corresponding names in the ppi network
                cami_genes = [self.ppi_vertex2gene[cami_vertex] for cami_vertex in cami_vlist]
    
                print(f'With the given seed genes: {seed_genes} \n' +
                      f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):')
                for vertex in cami_vlist:
                    print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}')
                # for visualization
                self.result_gene_sets[cami_method_name] = cami_genes
    
                if cami_method_name == 'cami_v1':
                    # for drugstone
                    self.cami_vertices = cami_vlist
    
                # save the results in outputfiles
                self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
                                     gene_name_map, codes2tools, result_sets, cami_scores)
    
        def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
                                gene_name_map, codes2tools, result_sets, cami_scores):
            # save all predictions by all tools
            print('Saving the results...')
            with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
                outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+
                                 f'initially: {self.initial_seed_lst}\n')
                outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n')
                all_vertices = cami_vertices.union(putative_vertices)
                for vertex in all_vertices:
                    outputfile.write(f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
            print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
    
            # save the predictions made by cami
            ncbi_url = ('\tncbi_url' if self.ncbi else '')
            ncbi_summary = ('\tncbi_summary' if self.ncbi else '')
    
            with open(f'{self.output_dir}/CAMI_output_{self.uid}.tsv', 'w') as outputfile:
                outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n')     
                for vertex in cami_vlist:
                    if self.ncbi:
                        url, summary = ncbi.send_request(gene_name_map[vertex])
                        url = '\t' + url
                        if summary is not None:
                            summary = '\t' + summary
                        else:
                            summary = ''
                    else:
                        url, summary = '',''
                    outputfile.write(f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
            
            # save the whole module
            whole_module = []
            with open(f'{self.output_dir}/CAMI_module_{cami_method}_{self.uid}.txt', 'w') as modfile:
                    for vertex in seed_genes:
                        modfile.write(f'{vertex}\n')
                        whole_module.append(vertex)
                    for vertex in cami_genes:
                        modfile.write(f'{vertex}\n')
                        whole_module.append(vertex)
    
            print(f'saved cami output in: {self.output_dir}/CAMI_output_{self.uid}.tsv')
            print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/CAMI_nodes_{cami_method}_{self.uid}.txt')
            
            # transform all vertex indices to their corresponding gene names in a result set
            for tool in result_sets:
                self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]])
            
            # save predictions by the other tools
            for tool in self.result_gene_sets:
                with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile:
                    outputfile.write('gene\n')
                    for gene in self.result_gene_sets[tool]:
                        outputfile.write(f'{gene}\n')
                print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
                
            # for drugstone
            self.cami_vertices = cami_vlist
            
            # return values
            consensus = {}
            consensus['module'] = whole_module
            consensus['seeds'] = self.seed_lst
    
    
        def use_nvenn(self):
            """Create Venn Diagrams via a external tool named degradome.
               Sends a request via requests to the degradome server.
               Returns the URL of the result.
            """
            # visualize with degradome
            if self.nof_tools < 7:
                print('Visualizing results using Degradome...')
                degradome_sets = {tool:self.result_gene_sets[tool] 
                                  for tool in self.result_gene_sets 
                                  if len(self.result_gene_sets[tool])>0}
                url = degradome.send_request(degradome_sets, {self.ppi_vertex2gene[seed] for seed in self.seed_lst})
                with open(f'{self.output_dir}/venn_link_{self.uid}.txt', 'w') as f:
                    f.write(url)
                return url
    
            # elif nof_tools == 6:
            #     print('Visualizing using Degradome...(seeds excluded from results)')
            #     # degradome_sets = result_sets.copy()
            #     # degradome_sets['CAMI'] = set(result_genes)
            #     url = degradome.send_request(degradome_sets)
            #     webbrowser.open(url)
            else:
                print('Cannot use degradome to create venn diagrams of 6 or more tools')
    
        def download_diagram(self, url):
            venn_name = f'{self.output_dir}/vdiagram_{self.uid}'
            response = degradome.download_image(url, venn_name + '.png')
            if response is not None:
                with open(f'{venn_name}.html', 'w') as r:
                    r.write(response.html.html)
    
        def use_drugstone(self):
            symbol = self.ppi_graph.vertex_properties["symbol"]
            cami_module = self.cami_vertices + self.seed_lst
            cami_symbols = [symbol[vertex] for vertex in cami_module]
            cami_symbols.append
            cami_symbol_edges = []
            
            for vertex in self.cami_vertices:
                for edge in vertex.all_edges():
                    cami_symbol_edges.append((symbol[edge.source()], symbol[edge.target()]))
            #print(list(set(cami_symbol_edges)))
            url = drugstone.send_request(cami_symbols, cami_symbol_edges)
            print(f'You can find a network visualization of the CAMI module via: {url}')
            print('The link was also saved in the outputfolder for later.')
            with open(f'{self.output_dir}/drugstone_link_{self.uid}.txt', 'w') as f:
                f.write(url)
            return url
    
        def remove_seeds(self, idx_lst):
            """remove seeds at indices idx
    
            Args:
                idx_lst (lst): list of indices to be removed
            """
            removed_seeds = [self.seed_lst[idx] for idx in idx_lst]
            self.seed_lst = [seed for seed in self.seed_lst if seed not in removed_seeds]
            for seed in self.seed_lst:
                self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score
            for seed in removed_seeds:
                self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0
            return removed_seeds