diff --git a/cami_src/cami_suite.py b/cami_src/cami_suite.py index 7047a99bc0e1175fc63a4f9ba5b058b3f3936ae7..0dc71af42a773c7e722c884d2f9c688044454699 100644 --- a/cami_src/cami_suite.py +++ b/cami_src/cami_suite.py @@ -28,12 +28,13 @@ def list_combinations(lst, k): for _ in range(l): column.append(lst[1]) columns.append(column) - l = l/2 + l = l / 2 if l >= 1: - l=int(l) + l = int(l) combs = [tuple([column[i] for column in columns]) for i in range(nof_combs)] assert len(set(combs)) == nof_combs - return(combs) + return (combs) + def initialize_cami(path_to_ppi_file=''): cami_params = {} @@ -50,7 +51,7 @@ def initialize_cami(path_to_ppi_file=''): cami_params['home_path'] = home_path cami_source = os.path.join(home_path, 'cami_src') cami_params['cami_src_path'] = cami_source - + # initialize tool wrappers diamond = DiamondWrapper() domino = DominoWrapper() @@ -58,39 +59,40 @@ def initialize_cami(path_to_ppi_file=''): wrappers = [diamond, domino, robust] nof_tools = len(wrappers) cami_params['tool_wrappers'] = wrappers - + # preprocessing if path_to_ppi_file == '': ppi_network = 'example_network.tsv' ppi_file = os.path.join(home_path, f'data/input/networks/{ppi_network}') else: ppi_file = path_to_ppi_file - - symbol_columns = [] # if the two symbol columns in the ppi_network file are not named - # ['Official_Symbol_Interactor_A', 'Official_Symbol_Interactor_B'] - # provide the names here. + + symbol_columns = [] # if the two symbol columns in the ppi_network file are not named + # ['Official_Symbol_Interactor_A', 'Official_Symbol_Interactor_B'] + # provide the names here. ppi_graph = preprocess.csv2graph(ppi_file, symbol_columns, nof_tools) - + cami_params['ppi_graph'] = ppi_graph - + # dictionary with name of the seeds and file seed_directory = os.path.join(home_path, 'data/input/seeds') - seed_files = {'adhd':'adhd.tsv',\ - 'alcl':'alcl.tsv',\ - 'joubert':'joubert_syndrome.tsv'} + seed_files = {'adhd': 'adhd.tsv', \ + 'alcl': 'alcl.tsv', \ + 'joubert': 'joubert_syndrome.tsv'} seed_paths = {} for seed_file in seed_files: seed_paths[seed_file] = os.path.join(seed_directory, seed_files[seed_file]) - seed_lists = {seedname:preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths} + seed_lists = {seedname: preprocess.txt2lst(seed_paths[seedname]) for seedname in seed_paths} - class cami(): """ A module that is used for Active Module identifaction based on a consensus approach """ - def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf', seed_score=10, parallelization=False): + + def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, home_path, tmp_dir='', config='camiconf', + seed_score=10, parallelization=False): """Instance variables of CAMI :param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of @@ -108,11 +110,11 @@ class cami(): :param home_path: Path to the cami home directory (gitlab repository) :type home_path: str """ - self.debug = False + self.debug = False self.ppi_graph = ppi_graph self.origin_ppi_graph = ppi_graph.copy() self.ppi_vertex2gene = self.ppi_graph.vertex_properties["name"] - self.ppi_gene2vertex = {self.ppi_vertex2gene[vertex]:vertex for vertex in self.ppi_graph.vertices()} + self.ppi_gene2vertex = {self.ppi_vertex2gene[vertex]: vertex for vertex in self.ppi_graph.vertices()} self.initial_seed_lst = None self.seed_lst = seed_lst self.origin_seed_lst = seed_lst.copy() @@ -123,15 +125,15 @@ class cami(): if tmp_dir == '': tmp_dir = os.path.join(home_path, 'data', 'tmp', self.uid) self.tmp_dir = tmp_dir - + self.nof_tools = len(tool_wrappers) - self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices) WITHOUT seeds - self.result_module_sets = {} #contains the genes predicted by the tools (not the indices) WITH seeds - self.cami_module = [] # TODO: pick place where cami_module is set, which consensus approach should we use? - self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers} + self.result_gene_sets = {} # contains the genes predicted by the tools (not the indices) WITHOUT seeds + self.result_module_sets = {} # contains the genes predicted by the tools (not the indices) WITH seeds + self.cami_module = [] # TODO: pick place where cami_module is set, which consensus approach should we use? + self.code2toolname = {tool.code: tool.name for tool in self.tool_wrappers} self.code2toolname[0] = 'No tool' self.ncbi = False - + config = ConfigParser() config.read('camiconf') self.seed_score = config.get('cami', 'seed_score') @@ -140,14 +142,14 @@ class cami(): # set weights for seed genes in ppi_graph for seed in self.seed_lst: self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score - + def reset_cami(self, new_uid='', change_tmp=False): if not new_uid == '': self.uid = new_uid if change_tmp: new_tmp_dir = os.path.join(self.home_path, - 'data', - self.uid) + 'data', + self.uid) os.makedirs(new_tmp_dir) self.tmp_dir = new_tmp_dir self.ppi_graph = self.origin_ppi_graph.copy() @@ -155,7 +157,7 @@ class cami(): self.result_module_sets = {} self.cami_vertices = [] self.seed_lst = self.origin_seed_lst.copy() - self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers} + self.code2toolname = {tool.code: tool.name for tool in self.tool_wrappers} self.code2toolname[0] = 'No tool' def set_initial_seed_lst(self, seedlst): @@ -167,7 +169,7 @@ class cami(): tool.set_homepath(self.home_path) tool.set_id(self.uid) tool.set_config(self.config) - + def initialize_all_tools(self): for tool in self.tool_wrappers: self.initialize_tool(tool) @@ -180,7 +182,7 @@ class cami(): :return: A set of predicted vertices by the used tool :rtype: set() """ - tool.create_tmp_output_dir(self.tmp_dir) # creates the temporary input directory + tool.create_tmp_output_dir(self.tmp_dir) # creates the temporary input directory if self.debug: print(f"preparing {tool.name} input...") inputparams = tool.prepare_input() if self.debug: print(f'running {tool.name}...') @@ -198,16 +200,18 @@ class cami(): tar_id='entrez', mode='set-set', distance='jaccard', - ref=set(self.seed_lst), + ref=set(self.seed_lst), ref_id='entrez') - if validation_results['status'] == 'ok': - biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir) - biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results, - mode='set-set', - tar=set(self.result_module_sets[result_set]), - tar_id='entrez', - out_dir=self.output_dir, - prefix=f'{result_set}_{self.uid}') + if validation_results['status'] == 'ok': + biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', + self.output_dir) + biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results, + mode='set-set', + tar=set(self.result_module_sets[result_set]), + tar_id='entrez', + out_dir=self.output_dir, + prefix=f'{result_set}_{self.uid}') + def run_threaded_tool(self, tool, pred_sets): """run a tool in one thread and save the results into a dictionary pred_sets @@ -216,7 +220,7 @@ class cami(): pred_sets (dict): a dictionary that maps a tool to its result set """ preds = self.run_tool(tool) - pred_sets[tool] = preds #- seed_set + pred_sets[tool] = preds # - seed_set def make_predictions(self) -> dict: """create all predictions using the tools specified in tool_wrappers @@ -227,22 +231,22 @@ class cami(): """ if self.debug: print(f'Creating result sets of all {self.nof_tools} tools...') - pred_sets = {tool:None for tool in self.tool_wrappers} - + pred_sets = {tool: None for tool in self.tool_wrappers} + if self.threaded: - threads = [threading.Thread(target=self.run_threaded_tool, args=(tool, pred_sets,)) - for tool in self.tool_wrappers] + threads = [threading.Thread(target=self.run_threaded_tool, args=(tool, pred_sets,)) + for tool in self.tool_wrappers] for thread in threads: thread.start() - + for thread in threads: thread.join() else: for tool in self.tool_wrappers: pred_sets[tool] = self.run_tool(tool) - - assert(list(pred_sets.values()).count(None) < 1) - result_sets = {tool:set([self.ppi_graph.vertex(idx) for idx in pred_sets[tool]]) + + assert (list(pred_sets.values()).count(None) < 1) + result_sets = {tool: set([self.ppi_graph.vertex(idx) for idx in pred_sets[tool]]) for tool in pred_sets} return result_sets @@ -291,58 +295,57 @@ class cami(): seed_list = self.seed_lst tool_name_map = self.code2toolname gene_name_map = self.ppi_vertex2gene - + # remove seeds from result sets for tool in result_sets: result_sets[tool] -= set(self.seed_lst) + params = {'hub_pentalty': [0, 0.25, 0.5, 0.75, 1.0], 'damping_factor': [0.1, 0.25, 0.5, 0.75], 'confidence_level': [0.2, 0.35, 0.5, 0.75], 'ranking':["trustrank", "betweenness", "harmonic"], 'function':[cami_v2.run_cami, cami_v3.run_cami]} + camis = { 'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}}, 'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': { - 'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75 + 'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5 }}, - 'cami_v2_param1_bc': {'function': cami_v2.run_cami, 'params': { - 'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness' + 'cami_v2_param1_b': {'function': cami_v2.run_cami, 'params': { + 'hub_penalty': 0.3, 'ranking': 'betweenness', 'confidence_level': 0.5 }}, - 'cami_v2_param1_m': {'function': cami_v2.run_cami, 'params': { - 'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must' + 'cami_v2_param1_hc': {'function': cami_v2.run_cami, 'params': { + 'hub_penalty': 0.3,'ranking': 'harmonic', 'confidence_level': 0.5 }}, 'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': { 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5 }}, - 'cami_v2_param2_m': {'function': cami_v2.run_cami, 'params': { - 'hub_penalty': 0, 'damping_factor': 0.7, 'ranking': 'must', + 'cami_v2_param2_b': {'function': cami_v2.run_cami, 'params': { + 'hub_penalty': 0, 'ranking': 'betweenness', 'confidence_level': 0.5 }}, - 'cami_v2_param2_bc': {'function': cami_v2.run_cami, 'params': { - 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness' + 'cami_v2_param2_hc': {'function': cami_v2.run_cami, 'params': { + 'hub_penalty': 0, 'ranking': 'harmonic', 'confidence_level': 0.5 }}, 'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': { - 'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75 + 'hub_penalty': 0.3, 'damping_factor': 0.7, 'confidence_level': 0.5 }}, - 'cami_v3_param1_bc': {'function': cami_v3.run_cami, 'params': { - 'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'betweenness' + 'cami_v3_param1_b': {'function': cami_v3.run_cami, 'params': { + 'hub_penalty': 0.3, 'ranking': 'betweenness', 'confidence_level': 0.5 }}, - 'cami_v3_param1_m': {'function': cami_v3.run_cami, 'params': { - 'hub_penalty': 0.8, 'damping_factor': 0.5, 'confidence_level': 0.75, 'ranking': 'must' + 'cami_v3_param1_hc': {'function': cami_v3.run_cami, 'params': { + 'hub_penalty': 0.3, 'ranking': 'harmonic', 'confidence_level': 0.5 }}, 'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': { - 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'trustrank' - }}, - 'cami_v3_param2_bc': {'function': cami_v3.run_cami, 'params': { - 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'betweenness' + 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5 }}, - 'cami_v3_param2_m': {'function': cami_v3.run_cami, 'params': { - 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must' + 'cami_v3_param2_b': {'function': cami_v3.run_cami, 'params': { + 'hub_penalty': 0, 'ranking': 'betweenness', 'confidence_level': 0.5 }}, - 'cami_v3_param3_m': {'function': cami_v3.run_cami, 'params': { - 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must', 'trees': 15 + 'cami_v3_param2_hc': {'function': cami_v3.run_cami, 'params': { + 'hub_penalty': 0, 'ranking': 'harmonic', 'confidence_level': 0.5 }}, } - + # transform all vertex indices to their corresponding gene names in a result set for tool in result_sets: self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]]) - + # create integer codes for cami_versions (needed for predicted_by vertex property) recursion_limit = sys.getrecursionlimit() for cami_method_name, cami_params in camis.items(): @@ -351,11 +354,11 @@ class cami(): # create integer codes for cami_versions (needed for predicted_by vertex property) tool_code = max(list(tool_name_map.keys())) + 1 tool_name_map[tool_code] = cami_method_name - + cami_vertices, putative_vertices, codes2tools = cami_params['function'](result_sets, ppi_graph, seed_list, - predicted_by, cami_scores, - tool_name_map, tool_code, - cami_params['params']) + predicted_by, cami_scores, + tool_name_map, tool_code, + cami_params['params']) # sort the resulting vertices according to their cami_score cami_vlist = sorted(cami_vertices, key=lambda v: cami_scores[v], reverse=True) @@ -365,24 +368,27 @@ class cami(): if self.debug: print(f'With the given seed genes: {seed_genes} \n' + - f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):') + f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):') for vertex in cami_vlist: print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}') else: - print(f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module') - + print( + f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(cami_vlist)} genes to add to the Active Module') + # for visualization with nvenn self.result_gene_sets[cami_method_name] = set(cami_genes) - + sys.setrecursionlimit(recursion_limit) # save the results in outputfiles self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, gene_name_map, codes2tools, cami_scores) - - # add seeds to result sets for drugstone and digest + + # add seeds to result sets for drugstone and digest for toolname in self.result_gene_sets: - self.result_module_sets[toolname] = self.result_gene_sets[toolname].union(set([gene_name_map[svertex] for svertex in self.seed_lst])) - print(f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes') + self.result_module_sets[toolname] = self.result_gene_sets[toolname].union( + set([gene_name_map[svertex] for svertex in self.seed_lst])) + print( + f'With the {len(seed_genes)} seed genes the module predicted by {toolname} contains {len(self.result_module_sets[toolname])} genes') def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, gene_name_map, codes2tools, cami_scores): @@ -390,20 +396,23 @@ class cami(): if self.debug: print('Saving the results...') with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile: - outputfile.write(f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n'+ - f'initially: {self.initial_seed_lst}\n') + outputfile.write( + f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n' + + f'initially: {self.initial_seed_lst}\n') outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n') all_vertices = cami_vertices.union(putative_vertices) for vertex in all_vertices: - outputfile.write(f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n') - if self.debug: print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv') + outputfile.write( + f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n') + if self.debug: print( + f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv') # save the predictions made by cami ncbi_url = ('\tncbi_url' if self.ncbi else '') ncbi_summary = ('\tncbi_summary' if self.ncbi else '') with open(f'{self.output_dir}/{cami_method}_output_{self.uid}.tsv', 'w') as outputfile: - outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n') + outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n') for vertex in cami_vlist: if self.ncbi: url, summary = ncbi.send_request(gene_name_map[vertex]) @@ -413,9 +422,10 @@ class cami(): else: summary = '' else: - url, summary = '','' - outputfile.write(f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n') - + url, summary = '', '' + outputfile.write( + f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n') + # # save the whole module # whole_module = [] # with open(f'{self.output_dir}/{cami_method}_module_{self.uid}.txt', 'w') as modfile: @@ -428,8 +438,7 @@ class cami(): # print(f'saved {cami_method} output in: {cami_method}_output_{self.uid}.tsv') # print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/{cami_method}_module_{self.uid}.txt') - - + # save predicted modules by all other tools for tool in self.result_module_sets: with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile: @@ -439,7 +448,6 @@ class cami(): if self.debug: print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv') - def use_nvenn(self): """Create Venn Diagrams via a external tool named degradome. Sends a request via requests to the degradome server. @@ -448,9 +456,9 @@ class cami(): # visualize with degradome if self.nof_tools < 7: print('Visualizing results using Degradome...') - degradome_sets = {tool:self.result_gene_sets[tool] - for tool in self.result_gene_sets - if len(self.result_gene_sets[tool])>0} + degradome_sets = {tool: self.result_gene_sets[tool] + for tool in self.result_gene_sets + if len(self.result_gene_sets[tool]) > 0} url = degradome.send_request(degradome_sets) with open(f'{self.output_dir}/venn_link_{self.uid}.txt', 'w') as f: f.write(url) @@ -479,11 +487,11 @@ class cami(): cami_symbols = [symbol[vertex] for vertex in cami_module] cami_symbols.append cami_symbol_edges = [] - + for vertex in self.cami_vertices: for edge in vertex.all_edges(): cami_symbol_edges.append((symbol[edge.source()], symbol[edge.target()])) - #print(list(set(cami_symbol_edges))) + # print(list(set(cami_symbol_edges))) url = drugstone.send_request(cami_symbols, cami_symbol_edges) print(f'You can find a network visualization of the CAMI module via: {url}') with open(f'{self.output_dir}/drugstone_link_{self.uid}.txt', 'w') as f: @@ -503,4 +511,3 @@ class cami(): for seed in removed_seeds: self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0 return removed_seeds - diff --git a/cami_src/consensus/cami_v2.py b/cami_src/consensus/cami_v2.py index f8cba3c137c06c1643b1ed44c135b9062aaea770..72a02f3b99f4ed0cf7706b07ee8f535e8952efea 100644 --- a/cami_src/consensus/cami_v2.py +++ b/cami_src/consensus/cami_v2.py @@ -1,7 +1,7 @@ import sys from collections import defaultdict import graph_tool as gt -from utils.networks import trustrank, betweenness, must +from utils.networks import trustrank, betweenness, must, closeness @@ -38,12 +38,15 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t if ranking_method == 'trustrank': scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) elif ranking_method == 'betweenness': - scores = betweenness(subnet, seed_lst, hub_penalty, weights) + scores = betweenness(subnet, hub_penalty, weights) elif ranking_method == 'must': scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees) + elif ranking_method == 'harmonic': + scores = closeness(subnet, hub_penalty, weights) putative_scores = scores.a[[int(id) for id in putative_vertices]] putative_scores.sort() + threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] for v in putative_vertices: if scores.a[int(v)] >= threshold and scores.a[int(v)] > 0: diff --git a/cami_src/consensus/cami_v3.py b/cami_src/consensus/cami_v3.py index aa233fd6dfbbed2cbc8ecf8a7cac9744110bcf3d..aa6522e6bceb58f70bd4d672afaab27cd9fcd6cf 100644 --- a/cami_src/consensus/cami_v3.py +++ b/cami_src/consensus/cami_v3.py @@ -1,18 +1,19 @@ import sys from collections import defaultdict -from utils.networks import trustrank, betweenness, must +from utils.networks import trustrank, betweenness, must, closeness import graph_tool as gt + # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries # TODO maybe find a smart way to cutoff automatically? def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params): damping_factor = params['damping_factor'] hub_penalty = params['hub_penalty'] - confidence_level = params['confidence_level'] + confidence_level = params.get('confidence_level', 0.5) weighted = 'weighted' in params and params['weighted'] ranking_method = params['ranking'] if 'ranking' in params else 'trustrank' - trees = params.get('trees',5) - tolerance = params.get('tolerance',10) + trees = params.get('trees', 5) + tolerance = params.get('tolerance', 10) # calculate gene weights # set of all result genes @@ -40,9 +41,11 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t if ranking_method == 'trustrank': scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) elif ranking_method == 'betweenness': - scores = betweenness(subnet, seed_lst, hub_penalty, weights) + scores = betweenness(subnet, hub_penalty, weights) elif ranking_method == 'must': scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance) + elif ranking_method == 'harmonic': + scores = closeness(subnet, hub_penalty, weights) tool_scores[tool] = scores putative_score_map = defaultdict(lambda: 0) @@ -54,7 +57,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t pass putative_scores = list(putative_score_map.values()) putative_scores.sort() - threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] + threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] for v in putative_vertices: if putative_score_map[v] >= threshold and putative_score_map[v] > 0: cami_vertices.add(v) diff --git a/cami_src/example_run.py b/cami_src/example_run.py index d2201f0dd01928b2b2df2359eb4b73034a60db12..ed3ed87b932cd79ed53552806d82fff9a8ba8346 100755 --- a/cami_src/example_run.py +++ b/cami_src/example_run.py @@ -9,5 +9,5 @@ chdir((sys.argv[0].rsplit('/', 1))[0]) networkfile = "../data/input/networks/example_network.tsv" seedfile = "../data/input/seeds/example_seeds.txt" identifier = "example_run" -command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -d -p --f;' +command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -img -p --f;' subprocess.call(command, shell=True) \ No newline at end of file diff --git a/cami_src/utils/networks.py b/cami_src/utils/networks.py index f0459968947953a04bc07e1538395cb970b4a7ad..e61a812d596abe674c82877550d19b140d6c60e8 100644 --- a/cami_src/utils/networks.py +++ b/cami_src/utils/networks.py @@ -9,6 +9,7 @@ import graph_tool.topology as gtt import graph_tool.util as gtu import itertools as it + def edge_weights(g, base_weigths, hub_penalty, inverse=False): avdeg = gts.vertex_average(g, "total")[0] weights = g.new_edge_property("double", val=avdeg) @@ -124,6 +125,7 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): return g2 + def find_bridges(g): r"""Finds all bridges in a graph.""" global __time @@ -139,6 +141,7 @@ def find_bridges(g): __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge) return is_bridge + def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): visited[node] = True global __time @@ -156,7 +159,7 @@ def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): is_bridge[g.edge(node, nb)] = True except: pass - elif int(nb) != parent[node]: #TODO can in theory be removed + elif int(nb) != parent[node]: # TODO can in theory be removed low[node] = min(low[node], disc[nb]) @@ -164,9 +167,9 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10): if gt.openmp_enabled(): gt.openmp_set_num_threads(6) weights = edge_weights(g, weights, hub_penalty, inverse=True) - scores = defaultdict(lambda:0) + scores = defaultdict(lambda: 0) node_name_attribute = 'name' - seed_map = {g.vertex_properties[node_name_attribute][node] :node for node in seed_ids} + seed_map = {g.vertex_properties[node_name_attribute][node]: node for node in seed_ids} seed_ids = list(seed_map.keys()) first_tree = steiner_tree(g, seed_ids, seed_map, weights, hub_penalty > 0) num_found_trees = 1 @@ -184,7 +187,7 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10): match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node in range(first_tree.num_vertices())) for vertex in tree_nodes: - scores[vertex] +=1 + scores[vertex] += 1 if num_trees > 1: is_bridge = find_bridges(g) edge_filter = g.new_edge_property("boolean", True) @@ -229,8 +232,8 @@ def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10): if num_found_trees >= num_trees: break score_prop = g.new_vertex_property("float") - for v,c in scores.items(): - score_prop[int(v)]=c + for v, c in scores.items(): + score_prop[int(v)] = c return score_prop @@ -246,19 +249,27 @@ def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None): return scores -def betweenness(g, seed_ids, hub_penalty, weights=None): +def betweenness(g, hub_penalty, weights=None): if gt.openmp_enabled(): gt.openmp_set_num_threads(6) weights = edge_weights(g, weights, hub_penalty, inverse=True) - scores = g.new_vertex_property("float") - all_pairs = [(source, target) for source in seed_ids for target in seed_ids if source < target] - for source, target in all_pairs: - local_scores = g.new_vertex_property("float") - num_paths = 0.0 - for path in gtt.all_shortest_paths(g, source, target, weights=weights): - local_scores.a[path[1:-1]] += 1 - num_paths += 1 - if num_paths > 0: - local_scores.a /= num_paths - scores.a += local_scores.a + # Call graph-tool to compute TrustRank. + # trust = g.new_vertex_property("double") + scores, _ = gtc.betweenness(g, weight=weights) + # Compute and return the results. return scores + + +def closeness(g, hub_penalty, weights=None): + if gt.openmp_enabled(): + gt.openmp_set_num_threads(6) + weights = edge_weights(g, weights, hub_penalty, inverse=True) + # Call graph-tool to compute TrustRank. + # trust = g.new_vertex_property("double") + scores = gtc.closeness(g, weight=weights, harmonic=True) + # Compute and return the results. + return scores + + + +