diff --git a/cami_src/algorithms/AlgorithmWrapper.py b/cami_src/algorithms/AlgorithmWrapper.py index 5e0acaf15559b391a51668538473cbef03299a72..6378fade6ebea5dcd882cce7b74063b69c1a463d 100644 --- a/cami_src/algorithms/AlgorithmWrapper.py +++ b/cami_src/algorithms/AlgorithmWrapper.py @@ -16,6 +16,7 @@ class AlgorithmWrapper(object): self.home_path = '' self.config = 'camiconf' self.code = 99 + self.debug = False def set_weight(self, weight): self.weight = weight diff --git a/cami_src/algorithms/DiamondWrapper.py b/cami_src/algorithms/DiamondWrapper.py index d76072ec5f071c977eee470a97dddb53487ee34e..93ca932a20200630a4dd4a5dc6012fcf72afc86a 100644 --- a/cami_src/algorithms/DiamondWrapper.py +++ b/cami_src/algorithms/DiamondWrapper.py @@ -63,7 +63,8 @@ class DiamondWrapper(AlgorithmWrapper): file.write(f"{str(edge.source())},{str(edge.target())}\n") inputparams.append(ppi_file) assert os.path.exists(ppi_file), f'Could create PPI-network file "{ppi_file}"' - print(f'{self.name} ppi is saved in {ppi_file}') + if self.debug: + print(f'{self.name} ppi is saved in {ppi_file}') # create seed file # parse through the seed list and write the ids of the vertices into a file @@ -71,7 +72,8 @@ class DiamondWrapper(AlgorithmWrapper): for seed in self.seeds: file.write(f"{seed}\n") assert os.path.exists(seed_file), f'Could create seed file "{seed_file}"' - print(f'{self.name} seeds are saved in {seed_file}') + if self.debug: + print(f'{self.name} seeds are saved in {seed_file}') inputparams.append(seed_file) # do not predict too much when there are not enough seeds @@ -79,7 +81,8 @@ class DiamondWrapper(AlgorithmWrapper): #MC: #CONFIG pred_factor = 10, max_preds = 100 nof_preds = min([nof_seeds * self.pred_factor, self.max_preds]) - print(f'With {nof_seeds} seeds, {self.name} will try to predict {nof_preds} active modules.') + if self.debug: + print(f'With {nof_seeds} seeds, {self.name} will try to predict {nof_preds} active modules.') inputparams.append(nof_preds) return inputparams diff --git a/cami_src/algorithms/DominoWrapper.py b/cami_src/algorithms/DominoWrapper.py index 0abd3c733d0e64d294c0b195a05e8c18fb107766..d642ecf6f2dccfc371a788dbe428024e2e363584 100644 --- a/cami_src/algorithms/DominoWrapper.py +++ b/cami_src/algorithms/DominoWrapper.py @@ -64,7 +64,8 @@ class DominoWrapper(AlgorithmWrapper): """ inputparams = [] # prepare inputfiles - print(f'creating {self.name} input files in {self.output_dir}') + if self.debug: + print(f'creating {self.name} input files in {self.output_dir}') ppi_filename = self.name_file('ppi', 'sif') ppi_file = os.path.join(self.output_dir, ppi_filename) @@ -87,16 +88,19 @@ class DominoWrapper(AlgorithmWrapper): for seed in self.seeds: file.write(f"{seed}_\n") inputparams.append(seed_file) - print(f'{self.name} seeds are saved in {seed_file}') + if self.debug: + print(f'{self.name} seeds are saved in {seed_file}') slices_filename = self.name_file('slices') slices_output = os.path.join(self.output_dir, slices_filename) if not os.path.exists(slices_output): - print('creating domino slices_file...') + if self.debug: + print('creating domino slices_file...') command = f'slicer --network_file "{ppi_file}" --output_file "{slices_output}"' subprocess.call(command, shell=True, stdout=subprocess.PIPE) - print(f'{self.name} slices are saved in {slices_output}') + if self.debug: + print(f'{self.name} slices are saved in {slices_output}') inputparams.append(slices_output) return inputparams diff --git a/cami_src/algorithms/RobustWrapper.py b/cami_src/algorithms/RobustWrapper.py index 5ecf3c3babdd26fb0e61f92a9534d667cd384452..84d00ef3af6552356e4ae3064a97b2e69d106351 100755 --- a/cami_src/algorithms/RobustWrapper.py +++ b/cami_src/algorithms/RobustWrapper.py @@ -82,12 +82,14 @@ class RobustWrapper(AlgorithmWrapper): for edge in self.ppi_network.edges(): file.write(f"{str(edge.source())}\t{str(edge.target())}\n") inputparams.append(ppi_file) - print(f'{self.name} ppi is saved in {ppi_file}') + if self.debug: + print(f'{self.name} ppi is saved in {ppi_file}') with open(seed_file, "w") as file: for seed in self.seeds: file.write(f"{seed}\n") - print(f'{self.name} seeds are saved in {seed_file}') + if self.debug: + print(f'{self.name} seeds are saved in {seed_file}') inputparams.append(seed_file) return inputparams diff --git a/cami_src/cami.py b/cami_src/cami.py index b64ae6c99f5c89c8883cccb227bf302fd7a2c98f..fa52db54157de44fea2811fe66092169a866cab3 100755 --- a/cami_src/cami.py +++ b/cami_src/cami.py @@ -136,8 +136,9 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, result_sets = cami.take_custom_results(external_input, result_sets) cami.create_consensus(result_sets) - for result in result_sets.keys(): - result_sets[result] = result_sets[result].union(cami.seed_lst) + # adds the seeds to the results, right now result_sets contains the seeds that should be ADDED to the module + # for result in result_sets.keys(): + # result_sets[result] = result_sets[result].union(cami.seed_lst) if nvenn or save_image: print('Sending results to nVenn') @@ -169,12 +170,7 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, if nvenn and vis: url = cami.nvenn() cami.download_diagram(url) - with open('/Users/Mia/cami_local/cami/data/output/explorativeness.tsv', 'a') as f: - make_consensus(vis=True) - seedname = seeds - for tool in cami.result_gene_sets: - f.write(f'\n{seedname}\t{len(cami.seed_lst)}\t{tool}\t{len(cami.result_gene_sets[tool])}') - + with open(f'{output_dir}/00_node_degrees.tsv', 'w') as node_degrees: node_degrees.write('vertex\tout_degree\tin_degree\n') for vertex in cami.ppi_graph.vertices(): @@ -184,7 +180,15 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, base_seeds = cami.origin_seed_lst original_seeds = [cami.ppi_vertex2gene[seed] for seed in base_seeds] print(f'Initializing CAMI and the seed variation by running CAMI with all given seeds:{original_seeds}') + + with open('/Users/Mia/cami_local/cami/data/output/explorativeness.tsv', 'a') as f: + make_consensus(vis=True) + seedname = seeds + for tool in cami.result_gene_sets: + f.write(f'\n{seedname}\t{len(cami.seed_lst)}\t{tool}\t{len(cami.result_gene_sets[tool])}') + #make_consensus(vis=True) + random.seed(50) removal_frac = 0.2 nof_iterations = int(seed_variation) @@ -215,10 +219,11 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, res_table1.write(f'{ident}') # update uid new_identifier = identifier + f'_{ident}' + # reset cami cami.reset_cami(new_uid=new_identifier) # cami.ppi_graph = original_ppi - #remove seeds + #remove seeds (again) print(f'Removing {nof_removals} seeds from the original seed list...') removed_seeds_idx = random.sample(list(range(nof_seeds)), nof_removals) removed_seeds = cami.remove_seeds(removed_seeds_idx) @@ -273,13 +278,19 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, rediscovery_rates_results = [results[0] for results in variation_results] # print(rediscovery_rates_results) tools = [tool for tool in rediscovery_rates_results[0].keys()] + for idx,tool in enumerate(tools): + if '_' in tool: + tmp_lst = tool.split('_') + linebreak_pos = len(tmp_lst)//2 + added_linebreak_lst = tmp_lst[:linebreak_pos] + ['\n'] + tmp_lst[linebreak_pos] + tools[idx] = ''.join(added_linebreak_lst) redisc_rates = [[res[tool] for res in rediscovery_rates_results] for tool in tools] #PLOT # Create a figure instance - plt.figure(figsize=(16,6)) + plt.figure(figsize=(50,8)) # Extract Figure and Axes instance ax1 = plt.subplot(1,2,1, label='ax1') @@ -331,55 +342,6 @@ def main(ppi_network, seeds, tools, tool_weights, consensus, evaluate, plt.savefig(f'{output_dir}/00_{identifier}_seed_variation_result.png') print(f'Violin plot saved under: 00_{identifier}_seed_variation_result.png') - # plot TP Rate - - # Extract Figure and Axes instance - fig2, ax2 = plt.subplots() - - colors = ['red', 'blue', 'black', 'purple'] - legend = [] - # Create a plot - for idx,tool in enumerate(used_tools): - scatter = ax2.scatter(list(range(1,nof_iterations + 1)),tp_rate_dict[tool], color=colors[idx]) - legend.append(scatter) - - plt.legend(legend, - used_tools) - # Add title - ax2.set_title(f'Sensitivity (TP/TP + FN) in {nof_iterations} iterations.', wrap=True) - - ax2.set_xticks(list(range(1,nof_iterations + 1))) - ax2.set_xticklabels([idx if idx%5==0 else '' for idx in range(1,nof_iterations+1)]) - ax2.set_xlabel('Iterations') - ax2.set_ylabel('Sensitivity (TP/TP + FN)') - - # Save the figure - sensitivity_file = f'{output_dir}/00_{identifier}_seed_variation_tp_rates.png' - fig2.savefig(sensitivity_file) - print(f'Sensitivity plot saved under {sensitivity_file}') - - - # plot module size frac - fig3, ax3 = plt.subplots() - legend = [] - for idx,tool in enumerate(used_tools): - scatter = ax3.scatter(list(range(1,nof_iterations + 1)), module_size_dict[tool], color=colors[idx]) - legend.append(scatter) - - plt.legend(legend, - used_tools) - # Add title - ax3.set_title(f'Ratio of number of rediscovered seeds and CAMI module size', wrap=True) - - ax3.set_xticks((list(range(1,nof_iterations + 1)))) - ax3.set_xticklabels([idx if idx%5==0 else '' for idx in range(1,nof_iterations+1)]) - ax3.set_xlabel('Iterations') - ax3.set_ylabel('Module size ratio (<rediscovered seeds>/<module size>)') - - # Save the fig1ure - size_file = f'{output_dir}/00_{identifier}_redisc_modulesize_rate.png' - fig3.savefig(size_file) - print(f'Sensitivity plot saved under {size_file}') if save_temps: print(f'All temporary files were kept in {tmp_dir}') diff --git a/cami_src/cami_suite.py b/cami_src/cami_suite.py index a087e11148355b9e329c6d3bd192ce6942e6e231..3365074782c0d76d8d14dc971705bcc39dd2e78b 100644 --- a/cami_src/cami_suite.py +++ b/cami_src/cami_suite.py @@ -106,7 +106,8 @@ class cami(): :type tmp_dir: str :param home_path: Path to the cami home directory (gitlab repository) :type home_path: str - """ + """ + self.debug = False self.ppi_graph = ppi_graph self.origin_ppi_graph = ppi_graph.copy() self.ppi_vertex2gene = self.ppi_graph.vertex_properties["name"] @@ -123,10 +124,11 @@ class cami(): self.tmp_dir = tmp_dir self.nof_tools = len(tool_wrappers) - self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices) + self.result_gene_sets = {} #contains the genes predicted by the tools (not the indices) WITHOUT seeds + self.result_module_sets = {} #contains the genes predicted by the tools (not the indices) WITH seeds + self.cami_module = [] # TODO: pick place where cami_module is set, which consensus approach should we use? self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers} - self.code2toolname[0] = 'CAMI' - self.cami_vertices = [] + self.code2toolname[0] = 'No tool' self.ncbi = False config = ConfigParser() @@ -149,8 +151,11 @@ class cami(): self.tmp_dir = new_tmp_dir self.ppi_graph = self.origin_ppi_graph.copy() self.result_gene_sets = {} + self.result_module_sets = {} self.cami_vertices = [] self.seed_lst = self.origin_seed_lst.copy() + self.code2toolname = {tool.code:tool.name for tool in self.tool_wrappers} + self.code2toolname[0] = 'No tool' def set_initial_seed_lst(self, seedlst): self.initial_seed_lst = seedlst @@ -184,11 +189,10 @@ class cami(): return preds def make_evaluation(self): - print(self.result_gene_sets) biodigest.setup.main(setup_type="api") - for result_set in self.result_gene_sets: + for result_set in self.result_module_sets: validation_results = biodigest.single_validation.single_validation( - tar=set(self.result_gene_sets[result_set]), + tar=set(self.result_module_sets[result_set]), tar_id='entrez', mode='set-set', distance='jaccard', @@ -198,7 +202,7 @@ class cami(): biodigest.single_validation.save_results(validation_results, f'{result_set}_{self.uid}', self.output_dir) biodigest.evaluation.d_utils.plotting_utils.create_plots(results=validation_results, mode='set-set', - tar=set(self.result_gene_sets[result_set]), + tar=set(self.result_module_sets[result_set]), tar_id='entrez', out_dir=self.output_dir, prefix=f'{result_set}_{self.uid}') @@ -284,6 +288,10 @@ class cami(): seed_list = self.seed_lst tool_name_map = self.code2toolname gene_name_map = self.ppi_vertex2gene + + # remove seeds from result sets + for tool in result_sets: + result_sets[tool] -= set(self.seed_lst) camis = { 'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}}, @@ -327,12 +335,17 @@ class cami(): 'hub_penalty': 0, 'damping_factor': 0.7, 'confidence_level': 0.5, 'ranking': 'must', 'trees': 15 }}, } + + # create integer codes for cami_versions (needed for predicted_by vertex property) for cami_method_name, cami_params in camis.items(): print("Running " + cami_method_name) + tool_code = max(list(tool_name_map.keys())) + 1 + tool_name_map[tool_code] = cami_method_name + cami_vertices, putative_vertices, codes2tools = cami_params['function'](result_sets, ppi_graph, seed_list, predicted_by, cami_scores, - tool_name_map, + tool_name_map, tool_code, cami_params['params']) # sort the resulting vertices according to their cami_score @@ -342,23 +355,33 @@ class cami(): # translate the resulting vertex() ids to the corresponding names in the ppi network cami_genes = [self.ppi_vertex2gene[cami_vertex] for cami_vertex in cami_vlist] - print(f'With the given seed genes: {seed_genes} \n' + - f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):') - for vertex in cami_vlist: - print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}') - # for visualization + if self.debug: + print(f'With the given seed genes: {seed_genes} \n' + + f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):') + for vertex in cami_vlist: + print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}') + else: + print(f'With the {len(seed_genes)} seed genes CAMI ({cami_method_name}) proposes {len(seed_genes)} to add to the Active Module') + + # for visualization with nvenn self.result_gene_sets[cami_method_name] = cami_genes - - if cami_method_name == 'cami_v1': - # for drugstone - self.cami_vertices = cami_vlist - + + # transform all vertex indices to their corresponding gene names in a result set + for tool in result_sets: + self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]]) + + # add seeds to result sets for drugstone and digest + for tool in result_sets: + self.result_module_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]]).union(self.seed_lst) + + assert(self.code2toolname == tool_name_map) + # save the results in outputfiles self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, - gene_name_map, codes2tools, result_sets, cami_scores) + gene_name_map, codes2tools, cami_scores) def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes, - gene_name_map, codes2tools, result_sets, cami_scores): + gene_name_map, codes2tools, cami_scores): # save all predictions by all tools print('Saving the results...') with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile: @@ -401,10 +424,7 @@ class cami(): print(f'saved cami output in: {self.output_dir}/CAMI_output_{self.uid}.tsv') print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/CAMI_nodes_{cami_method}_{self.uid}.txt') - # transform all vertex indices to their corresponding gene names in a result set - for tool in result_sets: - self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]]) - + # save predictions by the other tools for tool in self.result_gene_sets: with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile: @@ -412,10 +432,7 @@ class cami(): for gene in self.result_gene_sets[tool]: outputfile.write(f'{gene}\n') print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv') - - # for drugstone - self.cami_vertices = cami_vlist - + # return values consensus = {} consensus['module'] = whole_module @@ -456,7 +473,7 @@ class cami(): def use_drugstone(self): symbol = self.ppi_graph.vertex_properties["symbol"] - cami_module = self.cami_vertices + self.seed_lst + cami_module = self.cami_module cami_symbols = [symbol[vertex] for vertex in cami_module] cami_symbols.append cami_symbol_edges = [] diff --git a/cami_src/consensus/cami_v1.py b/cami_src/consensus/cami_v1.py index acba57398fff0e7a32ad0aa5e6f79ffcb0bf538f..b65e655bff5216e5c7e9e124d1e4900c8b12f308 100644 --- a/cami_src/consensus/cami_v1.py +++ b/cami_src/consensus/cami_v1.py @@ -1,6 +1,6 @@ import sys, os -def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params): +def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params): consens_threshold = params['consens_threshold'] # calculate gene weights # set of all result genes @@ -10,8 +10,8 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t # parse every result set of each tool for tool in result_sets: - print(f'{tool.name}: {tool.weight}') - result_sets[tool] -= set(seed_lst) + #print(f'{tool.name}: {tool.weight}') + # TODO: Should we keep the seeds in the result sets? # everytime a tool predicted a gene add 1 * the tool weight to its weight and add it to the result genes for vertex in result_sets[tool]: @@ -22,7 +22,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t vertex] >= consens_threshold: # if a vertex was predicted twice (or once if there is only 1 tool used) add it to the cami set putative_vertices.remove(vertex) cami_vertices.add(vertex) - predicted_by[vertex][0] = 1 + predicted_by[vertex][tool_code] = 1 # TODO: Find alternate ways to calculate CAMI scores => The heavy weights should get +0.5 too? # TODO: Try to rerun cami with varying input seeds? @@ -36,7 +36,7 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t for vertex in putative_vertices: if vertex in neighbors: # if a vertex is in the neighborhood of the heavy vertices increase the cami_score cami_vertices.add(vertex) - predicted_by[vertex][0] = 1 + predicted_by[vertex][tool_code] = 1 cami_scores[vertex] += 0.5 @@ -45,4 +45,4 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for vertex in ppi_graph.vertices()} - return cami_vertices.union(set(seed_lst)), putative_vertices, codes2tools \ No newline at end of file + return cami_vertices, putative_vertices, codes2tools \ No newline at end of file diff --git a/cami_src/consensus/cami_v2.py b/cami_src/consensus/cami_v2.py index bb0b6994f97f31f0f88f2dbffa642da4b8593bb4..f8cba3c137c06c1643b1ed44c135b9062aaea770 100644 --- a/cami_src/consensus/cami_v2.py +++ b/cami_src/consensus/cami_v2.py @@ -7,7 +7,7 @@ from utils.networks import trustrank, betweenness, must # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries # TODO maybe find a smart way to cutoff automatically? -def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params): +def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params): damping_factor = params['damping_factor'] hub_penalty = params['hub_penalty'] confidence_level = params.get('confidence_level',0.5) @@ -23,7 +23,6 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t # parse every result set of each tool counts = defaultdict(lambda: 0) for tool in result_sets: - result_sets[tool] -= set(seed_lst) for vertex in result_sets[tool]: putative_vertices.add(vertex) counts[vertex] = counts[vertex] + tool.weight @@ -49,8 +48,9 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t for v in putative_vertices: if scores.a[int(v)] >= threshold and scores.a[int(v)] > 0: cami_vertices.add(v) + predicted_by[v][tool_code] = 1 # translate tool code to string codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for vertex in ppi_graph.vertices()} - return cami_vertices.union(set(seed_lst)), putative_vertices, codes2tools + return cami_vertices, putative_vertices, codes2tools diff --git a/cami_src/consensus/cami_v3.py b/cami_src/consensus/cami_v3.py index 4fdea9756bd518e5d18f63abd9849a8063c81435..aa233fd6dfbbed2cbc8ecf8a7cac9744110bcf3d 100644 --- a/cami_src/consensus/cami_v3.py +++ b/cami_src/consensus/cami_v3.py @@ -5,10 +5,10 @@ import graph_tool as gt # This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries # TODO maybe find a smart way to cutoff automatically? -def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params): +def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, tool_code, params): damping_factor = params['damping_factor'] hub_penalty = params['hub_penalty'] - confidence_levelentage = params['confidence_level'] + confidence_level = params['confidence_level'] weighted = 'weighted' in params and params['weighted'] ranking_method = params['ranking'] if 'ranking' in params else 'trustrank' trees = params.get('trees',5) @@ -22,7 +22,6 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t # parse every result set of each tool counts = defaultdict(lambda: 0) for tool in result_sets: - result_sets[tool] -= set(seed_lst) for vertex in result_sets[tool]: putative_vertices.add(vertex) counts[vertex] = counts[vertex] + tool.weight @@ -55,12 +54,13 @@ def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2t pass putative_scores = list(putative_score_map.values()) putative_scores.sort() - threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_levelentage))] + threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_level))] for v in putative_vertices: if putative_score_map[v] >= threshold and putative_score_map[v] > 0: cami_vertices.add(v) + predicted_by[v][tool_code] = 1 # translate tool code to string codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for vertex in ppi_graph.vertices()} - return cami_vertices.union(set(seed_lst)), putative_vertices, codes2tools + return cami_vertices, putative_vertices, codes2tools diff --git a/cami_src/preprocess.py b/cami_src/preprocess.py index 8cd0a37016386efb9faaa0cb381e34e7db34db4c..2d54ff85d9b3758620b03442c7a653c8615c9db0 100644 --- a/cami_src/preprocess.py +++ b/cami_src/preprocess.py @@ -33,9 +33,8 @@ def csv2graph(inputfile, unseen_vertices -= 1 if unseen_vertices == 0: break - g.vertex_properties["betweenness"], g.edge_properties["betweenness"] = graph_tool.centrality.betweenness(g) g.vertex_properties["cami_score"] = g.new_vertex_property("float", val=0.0) - values = (20) * [-1] + values = (50) * [-1] g.vertex_properties["predicted_by"] = g.new_vertex_property("vector<int16_t>", val=values) return g