Skip to content
Snippets Groups Projects
Commit 654a8b48 authored by AndiMajore's avatar AndiMajore
Browse files

moved all changed to cami_src

parent 71a82189
No related branches found
No related tags found
No related merge requests found
Showing
with 395 additions and 1479 deletions
File moved
from graph_tool import Graph
import os
from numpy import outer
class AlgorithmWrapper(object):
#TODO: temporary/Working directory als parameter?
"""Abstract wrapper class for the network enrichment algorithms used in the tests."""
def __init__(self):
self.uid = '0000'
self.name = ''
self.weight = 1
self.output_dir = ''
self.ppi_network = Graph()
self.seeds = list()
self.home_path = ''
self.config = 'camiconf'
self.code = 99
def set_weight(self, weight):
self.weight = weight
def set_config(self, config_file):
self.config = config_file
def set_ppi_network(self, graph):
self.ppi_network = graph
def set_seeds(self, seeds):
self.seeds = seeds
def set_id(self, uid):
self.uid = uid
def set_homepath(self, path):
self.home_path = os.path.abspath(path)
def create_tmp_output_dir(self, tmp_dir):
out_dir = os.path.join(tmp_dir, self.name)
if not os.path.exists(out_dir):
os.mkdir(out_dir)
print(f"created temporary directory for {self.name} named {out_dir}...")
self.output_dir = out_dir
def name_file(self, kind, ending='txt'):
return f'{self.name}_{kind}.{ending}'
def run_algorithm(self, inputparams):
"""runs a module identification algorithm
:param inputparams: list of parameters as needed by the algorithm
:type inputparams: list(str)
:param identifier: indetifier for output files
:type identifier: str
:return: list of predicted nodes
:rtype: lst(str)
"""
pass
def prepare_input(self):
"""prepares the input ppi and seed genes as needed by the algorithm
:param ppi_network: the original ppi-network
:type ppi_network: Graph
:param seed_genes: list of seed genes for the algorithm
:type seed_genes: lst(str)
:param identifier: indetifier for output files
:type identifier: str
:return: inputparameters for the algorithm
:rtype: list(str)
"""
pass
def extract_output(self, algo_output):
"""return the output as a list of nodes predicted by an algorithm
:param algo_output: path to outputfile as given by the algorithm
:type algo_output: str
:param identifier: indetifier for output files
:type identifier: str
:return: list of predicted nodes
:rtype: lst(str)
"""
pass
import subprocess, os
#MC:
from configparser import ConfigParser
import ast
from algorithms.AlgorithmWrapper import AlgorithmWrapper
class DiamondWrapper(AlgorithmWrapper):
def __init__(self):
super().__init__()
self.name = 'DIAMOnD'
self.code = 1
config = ConfigParser()
config.read(self.config)
self.alpha = int(config.get('diamond', 'alpha'))
self.pred_factor = int(config.get('diamond', 'pred_factor'))
self.max_preds = int(config.get('diamond', 'max_preds'))
def run_algorithm(self, inputparams):
"""Run DIAMOnD algorithm
:param inputparams: list of inputparameters, in this case the paths to
the ppi and seeds
:type inputparams: list(str)
:return: list of predicted nodes
:rtype: list(str)
"""
# Run DIAMOnD.
# path to diamond
diamond_path = os.path.join(self.home_path, 'tools/DIAMOnD/')
diamond = f'cd "{diamond_path}"; python DIAMOnD.py'
ppi = inputparams[0] # path to ppi inputfile
seeds = inputparams[1] # path to seed inputfile
nof_predictions = inputparams[2] # how many active genes should be predicted
out_filename = self.name_file('out') # name the outputfile
algo_output = os.path.join(self.output_dir, out_filename) # specify the output location
#MC:
#CONFIG alpha = 1
command = f'{diamond} "{ppi}" "{seeds}" {nof_predictions} {self.alpha} "{algo_output}"'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
assert os.path.exists(algo_output), f'DIAMOnD failed to save output to {algo_output}'
print(f"DIAMOnD results saved in {algo_output}")
return self.extract_output(algo_output)
def prepare_input(self):
"""prepares the input ppi and seed genes as needed by the algorithm
"""
inputparams = []
# prepare inputfiles
# name ppi and seed file, specify path to files
ppi_filename = self.name_file('ppi')
ppi_file = os.path.join(self.output_dir, ppi_filename)
seed_filename = self.name_file('seeds')
seed_file = os.path.join(self.output_dir, seed_filename)
# create ppi file
with open(ppi_file, "w") as file:
# parse through the ppi graph and write the ids of the vertices into a file
for edge in self.ppi_network.edges():
file.write(f"{str(edge.source())},{str(edge.target())}\n")
inputparams.append(ppi_file)
assert os.path.exists(ppi_file), f'Could create PPI-network file "{ppi_file}"'
print(f'{self.name} ppi is saved in {ppi_file}')
# create seed file
# parse through the seed list and write the ids of the vertices into a file
with open(seed_file, "w") as file:
for seed in self.seeds:
file.write(f"{seed}\n")
assert os.path.exists(seed_file), f'Could create seed file "{seed_file}"'
print(f'{self.name} seeds are saved in {seed_file}')
inputparams.append(seed_file)
# do not predict too much when there are not enough seeds
nof_seeds = len(self.seeds)
#MC:
#CONFIG pred_factor = 10, max_preds = 100
nof_preds = min([nof_seeds * self.pred_factor, self.max_preds])
print(f'With {nof_seeds} seeds, {self.name} will try to predict {nof_preds} active modules.')
inputparams.append(nof_preds)
return inputparams
def extract_output(self, algo_output):
"""extract the results from an outputfile to a list indices
:param algo_output: path to outputfile
:type algo_output: str
:return: list of indicesb
:rtype: list(int)
"""
nodes = []
with open(algo_output, "r") as output:
for line in output.readlines()[1:]:
nodes.append(int(line.split("\t")[1]))
return nodes
from algorithms.AlgorithmWrapper import AlgorithmWrapper
import subprocess
from configparser import ConfigParser
class TemplateWrapper(AlgorithmWrapper):
def __init__(self):
"""Each tool needs certain predefined instance variables:
The only instance variables that need to be defined for each tool individually are:
- name (string): The name of the algorithm/tool
- code (int): A unique integer code for this tool. Choose any number (<21) that is not taken by other tools yet (currently taken are: 0,1,2,3)
- any constant numbers or variables that are defined in the config file for this tool
The following variables are inherited from the AlgorithmWrapper Super Class:
- UID (string)
- weight (int)
- output directory (string)
- ppi_network (Graph())
- seeds (list())
- home_path (string)
- config (string)
There is no need to redefine these variables when introducing a new algorithm.
"""
super().__init__()
self.name = '<tool_name>'
self.code = 4
config = ConfigParser()
config.read(self.config)
# self.nof_predictions = int(config.get('<tool_name>', 'nof_predictions'))
def run_algorithm(self, inputparams):
"""Execute the algorithm. Ideally the algorithm is a command line tool that takes
a single command for execution. Use the inputparams created by prepare_input()
accordingly and call the command using the subprocess library. For example:
command = f'{<algorithm>} {inputparams[0]} {inputparams[2]} {additional_parameters}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
If the algorithm needs multiple steps, you can conduct them here too. The only important
thing is that this function returns the extracted output from the outputfile created by
the algorithm.
Hint: Execute the algorithm and specify the outputfile name. From that outputfile
extract the predicted Active Module by using extract_output.
Args:
inputparams (list): A list of parameters for the algorithm that is defined via
prepare_input()
Returns:
list: A list of resulting genes extracted from the generated output file
"""
# example variables:
tool = 'example tool'
ppi = inputparams[0] # path to ppi inputfile
seeds = inputparams[1] # path to seed inputfile
out_filename = self.name_file('out') # name the outputfile
algo_output = f'{self.output_dir}/{out_filename}' # specify the output location
# Conduct the algorithm
command = f'{tool} {ppi} {seeds}'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
print(f"{tool} results saved in {algo_output}")
return self.extract_output(algo_output)
def prepare_input(self):
"""prepares the input ppi and seed genes as needed by the algorithm
generally, this function will write two new inputfiles as needed by the algorithm:
- a file with the seeds in the needed format
- a file with the ppi network in the needed format
If needed create more input files.
Hint: Save the input files into the temporary folder of the algorithm.
To access the path to the temporary folder use: self.output_dir
Returns:
list: a list of the paths to the created input files
"""
inputparams = []
# name ppi and seed file, specify paths to files
ppi_filename = self.name_file('ppi')
ppi_file = f'{self.output_dir}/{ppi_filename}'
seed_filename = self.name_file('seeds')
seed_file = f'{self.output_dir}/{seed_filename}'
# create ppi file
with open(ppi_file, "w") as file:
# parse through the ppi graph and write the ids of the vertices into a file
for edge in self.ppi_network.edges():
file.write(f"{str(edge.source())},{str(edge.target())}\n")
# add the location of the ppi network file to the list of inputparameters
inputparams.append(ppi_file)
print(f'{self.name} ppi network is saved in {ppi_file}')
# create seed file
with open(seed_file, "w") as file:
# parse through the seed list and write the ids of the vertices into a file
for seed in self.seeds:
file.write(f"{seed}\n")
# add the location of the seed file to the list of inputparameters
print(f'{self.name} seeds are saved in {seed_file}')
inputparams.append(seed_file)
return inputparams
def extract_output(self, algo_output):
"""extracts a resulting disease module from an outputfile and
transforms them into a list of vertices in the PPI network.
This list is handed back to the main CAMI suite for further
processing.
Hint: CAMI uses the indices in the PPI network to reference
the input genes i.e. the genes in the list do not correspond
to the genes in the input files.
:param algo_output: path to outputfile
:type algo_output: str
:return: list of predicted genes
:rtype: list(int)
"""
This diff is collapsed.
import threading
from utils import degradome, drugstone, ncbi
from consensus import cami_v1, cami_v2, cami_v3
def list_combinations(lst, k):
"""creates all possible combinations of length k with two objects in a list
:param lst: a list with length 2
:type lst: list()
:param k: length of the combinations
:type k: int
"""
nof_combs = int(2 ** k)
l = int(nof_combs / 2)
columns = []
while l >= 1:
column = []
while len(column) < nof_combs:
for _ in range(l):
column.append(lst[0])
for _ in range(l):
column.append(lst[1])
columns.append(column)
l = l / 2
if l >= 1:
l = int(l)
combs = [tuple([column[i] for column in columns]) for i in range(nof_combs)]
assert len(set(combs)) == nof_combs
return (combs)
class cami():
""" A module that is used for Active Module identifaction based on a
consensus approach
"""
def __init__(self, ppi_graph, seed_lst, tool_wrappers, output_dir, uid, tmp_dir, home_path, config, seed_score,
parallelization):
"""Instance variables of CAMI
:param ppi_graph: The PPI-Graph on which all predictions in CAMI are based of
:type ppi_graph: Graph()
:param seed_lst: A list of vertices that are the seeds for the predictions
:type seed_lst: Graph().vertex()
:param tool_wrappers: A list of AlgorithmWrappers() that correspond to the tools used for the predictions
:type tool_wrappers: list(AlgorithmWrapper())
:param output_dir: The path to the directory where the results are supposed to be saved
:type output_dir: str
:param uid: Identifier for the current excecution of CAMI
:type uid: str
:param tmp_dir: Directory where temporary files should be saved
:type tmp_dir: str
:param home_path: Path to the cami home directory (gitlab repository)
:type home_path: str
"""
self.ppi_graph = ppi_graph
self.origin_ppi_graph = ppi_graph.copy()
self.ppi_vertex2gene = self.ppi_graph.vertex_properties["name"]
self.ppi_gene2vertex = {self.ppi_vertex2gene[vertex]: vertex for vertex in self.ppi_graph.vertices()}
self.initial_seed_lst = None
self.seed_lst = seed_lst
self.origin_seed_lst = seed_lst.copy()
self.tool_wrappers = tool_wrappers
self.output_dir = output_dir
self.tmp_dir = tmp_dir
self.uid = str(uid)
self.nof_tools = len(tool_wrappers)
self.result_gene_sets = {} # contains the genes predicted by the tools (not the indices)
self.code2toolname = {tool.code: tool.name for tool in self.tool_wrappers}
self.code2toolname[0] = 'CAMI'
self.home_path = home_path
self.cami_vertices = []
self.ncbi = False
self.config = config
self.seed_score = seed_score
self.threaded = parallelization
# set weights for seed genes in ppi_graph
for seed in self.seed_lst:
self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score
def reset_cami(self, new_uid=''):
self.uid = new_uid
self.ppi_graph = self.origin_ppi_graph.copy()
self.result_gene_sets = {}
self.cami_vertices = []
self.seed_lst = self.origin_seed_lst.copy()
def set_initial_seed_lst(self, seedlst):
self.initial_seed_lst = seedlst
def initialize_tool(self, tool):
tool.set_ppi_network(self.ppi_graph)
tool.set_seeds(self.seed_lst)
tool.set_homepath(self.home_path)
tool.set_id(self.uid)
tool.set_config(self.config)
def initialize_all_tools(self):
for tool in self.tool_wrappers:
self.initialize_tool(tool)
def run_tool(self, tool):
"""Excecute the predictions using the AlgorithmWrapper() of a tool
:param tool: A tool that has the following methods: prepare_input, run_tool() and extract_output()
:type tool: AlgorithmWrapper()
:return: A set of predicted vertices by the used tool
:rtype: set()
"""
tool.create_tmp_output_dir(self.tmp_dir) # creates the temporary input directory
print(f"preparing {tool.name} input...")
inputparams = tool.prepare_input()
print(f'running {tool.name}...')
preds = set(tool.run_algorithm(inputparams))
print(f'{tool.name} predicted {len(preds)} active vertices (seeds not excluded):')
print(preds)
return preds
def make_evaluation(self):
print('Evaluation not implemented yet.')
def run_threaded_tool(self, tool, pred_sets):
"""run a tool in one thread and save the results into a dictionary pred_sets
Args:
tool (AlgorithmWrapper): Wrapper class for a tool
pred_sets (dict): a dictionary that maps a tool to its result set
"""
preds = self.run_tool(tool)
pred_sets[tool] = preds # - seed_set
def make_predictions(self):
"""create all predictions using the tools specified in tool_wrappers
:return: A dictionary that saves the predicted vertices with respect
to the corresponding tool
:rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
"""
print(f'Creating result sets of all {self.nof_tools} tools...')
pred_sets = {tool: None for tool in self.tool_wrappers}
if self.threaded:
threads = [threading.Thread(target=self.run_threaded_tool, args=(tool, pred_sets,))
for tool in self.tool_wrappers]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
else:
for tool in self.tool_wrappers:
pred_sets[tool] = self.run_tool(tool)
assert (list(pred_sets.values()).count(None) < 1)
result_sets = {tool: set([self.ppi_graph.vertex(idx) for idx in pred_sets[tool]])
for tool in pred_sets}
return result_sets
def take_custom_results(self, inputfiles, result_sets={}):
"""Takes a list of inputfiles and extracts the results from them to
include them in the consensus with the tools of CAMI
:param inputfiles: A list of dictionaries with the following properties:
key: The used tool name
values: the paths to result files of these tools
:type inputfiles: list(dict)
:return: A dictionary that saves the predicted vertices with respect
to the corresponding tool
:rtype: dict(AlgorithmWrapper():set(Graph.vertex()))
"""
for tool in inputfiles:
result_list = []
with open(inputfiles[tool]) as rfile:
for idx, line in enumerate(rfile):
if idx == 0:
tool.name = line.strip()
self.code2toolname[tool.code] = tool.name
else:
node = line.strip()
if node in self.ppi_gene2vertex:
result_list.append(self.ppi_gene2vertex[node])
result_sets[tool] = set(result_list)
return result_sets
def create_consensus(self, result_sets):
"""takes a set of active module predictions and creates a consensus
that combines all the results of the different tools.
:param result_sets: A dictionary with the following properties:
key: The used tool as AlgorithmWrapper() Object
values: Set of vertices in the ppi_graph that were
predicted by the key-tool
:type result_sets: {AlgorithmWrapper(): {Graph().vertex()}}
"""
cami_scores = self.ppi_graph.vertex_properties["cami_score"]
predicted_by = self.ppi_graph.vertex_properties["predicted_by"]
consens_threshold = min(self.nof_tools, 2)
ppi_graph = self.ppi_graph
seed_list = self.seed_lst
tool_name_map = self.code2toolname
gene_name_map = self.ppi_vertex2gene
camis = {
'cami_v1': {'function': cami_v1.run_cami, 'params': {'consens_threshold': consens_threshold}},
'cami_v2_param1_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'inclusion_perc':0.75
}},
'cami_v2_param1_bc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'inclusion_perc': 0.75, 'ranking':'betweenness'
}},
'cami_v2_param1_m': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'inclusion_perc': 0.75, 'ranking': 'must'
}},
'cami_v2_param2_tr': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc':0.5
}},
'cami_v2_param2_m': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc': 0.5, 'ranking': 'must',
}},
'cami_v2_param2_m_all': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc': 0.5, 'ranking': 'must', 'all_nodes':True
}},
'cami_v2_param2_bc': {'function': cami_v2.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc': 0.5, 'ranking':'betweenness'
}},
'cami_v3_param1_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'inclusion_perc': 0.75
}},
'cami_v3_param1_bc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'inclusion_perc': 0.75, 'ranking':'betweenness'
}},
'cami_v3_param1_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'inclusion_perc': 0.75, 'ranking': 'must'
}},
'cami_v3_param1_m_all': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0.8, 'damping_factor': 0.5, 'inclusion_perc': 0.75, 'ranking': 'must', 'all_nodes':True
}},
'cami_v3_param2_tr': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc':0.5, 'ranking':'trustrank'
}},
'cami_v3_param2_bc': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc': 0.5, 'ranking': 'betweenness'
}},
'cami_v3_param2_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc': 0.5, 'ranking': 'must'
}},
'cami_v3_param2_m_all': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc': 0.5, 'ranking': 'must', 'all_nodes':True
}},
'cami_v3_param3_m': {'function': cami_v3.run_cami, 'params': {
'hub_penalty': 0, 'damping_factor': 0.7, 'inclusion_perc': 0.5, 'ranking': 'must', 'trees':15
}},
}
for cami_method_name, cami_params in camis.items():
print("Running "+cami_method_name)
cami_vertices, putative_vertices, codes2tools = cami_params['function'](result_sets, ppi_graph, seed_list,
predicted_by, cami_scores,
tool_name_map,
cami_params['params'])
# sort the resulting vertices according to their cami_score
cami_vlist = sorted(cami_vertices, key=lambda v: cami_scores[v], reverse=True)
seed_genes = [self.ppi_vertex2gene[seed_vertex] for seed_vertex in seed_list]
# translate the resulting vertex() ids to the corresponding names in the ppi network
cami_genes = [self.ppi_vertex2gene[cami_vertex] for cami_vertex in cami_vlist]
print(f'With the given seed genes: {seed_genes} \n' +
f'CAMI ({cami_method_name}) proposes the following genes to add to the Active Module (sorted by CAMI Score):')
for vertex in cami_vlist:
print(f'{gene_name_map[vertex]}\t{cami_scores[vertex]}\t{codes2tools[vertex]}')
# for visualization
self.result_gene_sets[cami_method_name] = cami_genes
if cami_method_name == 'cami_v1':
# for drugstone
self.cami_vertices = cami_vlist
# save the results in outputfiles
self.generate_output(cami_method_name, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, result_sets, cami_scores)
def generate_output(self, cami_method, seed_genes, cami_vlist, cami_vertices, putative_vertices, cami_genes,
gene_name_map, codes2tools, result_sets, cami_scores):
print('Saving the results...')
with open(f'{self.output_dir}/all_predictions_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(
f'CAMI predictions with {len(self.seed_lst)} of initially {len(self.initial_seed_lst)} seeds: {seed_genes},\n' +
f'initially: {self.initial_seed_lst}\n')
outputfile.write(f'gene\tpredicted_by\tcami_score\tindex_in_graph\tdegree_in_graph\n')
all_vertices = cami_vertices.union(putative_vertices)
for vertex in all_vertices:
outputfile.write(
f'{gene_name_map[vertex]}\t{codes2tools[vertex]}\t{cami_scores[vertex]}\t{str(vertex)}\t{vertex.out_degree()}\n')
print(f'saved all predictions by the used tools in: {self.output_dir}/all_predictions_{self.uid}.tsv')
# save the predictions made by cami
ncbi_url = ('\tncbi_url' if self.ncbi else '')
ncbi_summary = ('\tncbi_summary' if self.ncbi else '')
with open(f'{self.output_dir}/CAMI_output_{self.uid}.tsv', 'w') as outputfile:
outputfile.write(f'gene\tindex_in_graph\tcami_score\tdegree_in_graph{ncbi_url}{ncbi_summary}\n')
for vertex in cami_vlist:
if self.ncbi:
url, summary = ncbi.send_request(gene_name_map[vertex])
url = '\t' + url
if summary is not None:
summary = '\t' + summary
else:
summary = ''
else:
url, summary = '', ''
outputfile.write(
f'{gene_name_map[vertex]}\t{str(vertex)}\t{cami_scores[vertex]}\t{vertex.out_degree()}{url}{summary}\n')
# save the whole module
with open(f'{self.output_dir}/CAMI_module_{cami_method}_{self.uid}.txt', 'w') as modfile:
for vertex in seed_genes:
modfile.write(f'{vertex}\n')
for vertex in cami_genes:
modfile.write(f'{vertex}\n')
print(f'saved cami output in: {self.output_dir}/CAMI_output_{cami_method}_{self.uid}.tsv')
print(f'saved the Consensus Active Module by CAMI in: {self.output_dir}/CAMI_nodes_{self.uid}.txt')
# transform all vertex indices to their corresponding gene names in a result set
for tool in result_sets:
self.result_gene_sets[tool.name] = set([gene_name_map[vertex] for vertex in result_sets[tool]])
# save predictions by the other tools
for tool in self.result_gene_sets:
with open(f'{self.output_dir}/{tool}_output_{self.uid}.tsv', 'w') as outputfile:
outputfile.write('gene\n')
for gene in self.result_gene_sets[tool]:
outputfile.write(f'{gene}\n')
print(f'saved {tool} output in: {self.output_dir}/{tool}_output_{self.uid}.tsv')
def use_nvenn(self):
"""Create Venn Diagrams via a external tool named degradome.
Sends a request via requests to the degradome server.
Returns the URL of the result.
"""
# visualize with degradome
if self.nof_tools < 7:
print('Visualizing results using Degradome...')
degradome_sets = {tool: self.result_gene_sets[tool]
for tool in self.result_gene_sets
if len(self.result_gene_sets[tool]) > 0}
url = degradome.send_request(degradome_sets)
with open(f'{self.output_dir}/venn_link_{self.uid}.txt', 'w') as f:
f.write(url)
return url
# elif nof_tools == 6:
# print('Visualizing using Degradome...(seeds excluded from results)')
# # degradome_sets = result_sets.copy()
# # degradome_sets['CAMI'] = set(result_genes)
# url = degradome.send_request(degradome_sets)
# webbrowser.open(url)
else:
print('Cannot use degradome to create venn diagrams of 6 or more tools')
def download_diagram(self, url):
venn_name = f'{self.output_dir}/vdiagram_{self.uid}'
response = degradome.download_image(url, venn_name + '.png')
if response is not None:
with open(f'{venn_name}.html', 'w') as r:
r.write(response.html.html)
def use_drugstone(self):
symbol = self.ppi_graph.vertex_properties["symbol"]
cami_module = self.cami_vertices + self.seed_lst
cami_symbols = [symbol[vertex] for vertex in cami_module]
cami_symbols.append
cami_symbol_edges = []
for vertex in self.cami_vertices:
for edge in vertex.all_edges():
cami_symbol_edges.append((symbol[edge.source()], symbol[edge.target()]))
# print(list(set(cami_symbol_edges)))
url = drugstone.send_request(cami_symbols, cami_symbol_edges)
print(f'You can find a network visualization of the CAMI module via: {url}')
print('The link was also saved in the outputfolder for later.')
with open(f'{self.output_dir}/drugstone_link_{self.uid}.txt', 'w') as f:
f.write(url)
return url
def remove_seeds(self, idx_lst):
"""remove seeds at indices idx
Args:
idx_lst (lst): list of indices to be removed
"""
removed_seeds = [self.seed_lst[idx] for idx in idx_lst]
self.seed_lst = [seed for seed in self.seed_lst if seed not in removed_seeds]
for seed in self.seed_lst:
self.ppi_graph.vertex_properties["cami_score"][seed] = self.seed_score
for seed in removed_seeds:
self.ppi_graph.vertex_properties["cami_score"][seed] = 0.0
return removed_seeds
# ========================================
# FileName: camiconf
# Date: 21:00 10.April.2022
# Author: Marcos Chow Castro
# Email: mctechnology170318@gmail.com
# GitHub: https://github.com/mctechnology17
# Brief: main configuration for cami
# =========================================
[domino]
visualization_flag = False
output_name = 'modules.out'
para = 1
c = 'false'
[diamond]
alpha : 1
pred_factor : 2
max_preds : 100
[robust]
initial_fraction = 0.25
reduction_factor = 0.9
number_steiner_trees = 30
threshold = 0.1
[cami]
seed_score = 10.0
# vim: set fdm=marker:
import sys
from collections import defaultdict
import graph_tool as gt
from utils.networks import trustrank, betweenness, must
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params):
damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty']
inclusion_percentage = params['inclusion_perc']
weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees', 5)
all_nodes = params.get('all_nodes', False)
# calculate gene weights
# set of all result genes
cami_vertices = set()
putative_vertices = set()
# CONFIG: consensus_threshold = 2
# parse every result set of each tool
counts = defaultdict(lambda: 0)
for tool in result_sets:
result_sets[tool] -= set(seed_lst)
for vertex in result_sets[tool]:
putative_vertices.add(vertex)
counts[vertex] = counts[vertex] + tool.weight
for vertex in seed_lst:
counts[vertex] = counts[vertex] + tool.weight
subnet = gt.GraphView(ppi_graph, vfilt=lambda v: v in putative_vertices or v in seed_lst)
weights = None
if weighted:
weights = subnet.new_edge_property("double")
for v, c in counts.items():
weights.a[int(v)] = c
if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights)
elif ranking_method == 'must':
if all_nodes:
scores = must(subnet, set(seed_lst).union(putative_vertices), 5, hub_penalty, weights, trees)
else:
scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees)
putative_scores = scores.a[[int(id) for id in putative_vertices]]
putative_scores.sort()
threshold = putative_scores[int(len(putative_vertices) * (1 - inclusion_percentage))]
for v in putative_vertices:
if scores[v] > threshold:
cami_vertices.add(v)
# translate tool code to string
codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for
vertex in ppi_graph.vertices()}
return cami_vertices, putative_vertices, codes2tools
import sys
from collections import defaultdict
from utils.networks import trustrank, betweenness, must
import graph_tool as gt
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params):
damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty']
inclusion_percentage = params['inclusion_perc']
weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees',5)
all_nodes = params.get('all_nodes',False)
tolerance = params.get('tolerance',10)
# calculate gene weights
# set of all result genes
cami_vertices = set()
putative_vertices = set()
# CONFIG: consensus_threshold = 2
# parse every result set of each tool
counts = defaultdict(lambda: 0)
for tool in result_sets:
result_sets[tool] -= set(seed_lst)
for vertex in result_sets[tool]:
putative_vertices.add(vertex)
counts[vertex] = counts[vertex] + tool.weight
for vertex in seed_lst:
counts[vertex] = counts[vertex] + tool.weight
tool_scores = dict()
for tool in result_sets:
subnet = gt.GraphView(ppi_graph, vfilt=lambda v: v in result_sets[tool] or v in seed_lst)
weights = None
if weighted:
weights = subnet.new_edge_property("double")
for v, c in counts.items():
weights.a[int(v)] = c
if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights)
elif ranking_method == 'must':
if all_nodes:
scores = must(ppi_graph, set(seed_lst).union(putative_vertices), trees, hub_penalty, weights, tolerance)
else:
scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance)
tool_scores[tool] = scores
putative_score_map = defaultdict(lambda: 0)
for _, scores in tool_scores.items():
for id in putative_vertices:
try:
putative_score_map[id] += scores.a[int(id)]
except:
pass
putative_scores = list(putative_score_map.values())
putative_scores.sort()
threshold = putative_scores[int(len(putative_vertices) * (1 - inclusion_percentage))]
for v in putative_vertices:
if putative_score_map[v] > threshold:
cami_vertices.add(v)
# translate tool code to string
codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for
vertex in ppi_graph.vertices()}
return cami_vertices, putative_vertices, codes2tools
#!/usr/bin/env python3
import sys
from os.path import basename
import subprocess, random
import matplotlib.pyplot as plt
network = sys.argv[1]
seedfiles = sys.argv[2:]
config = 'seed_variationconf'
for seeds in seedfiles:
identifier = basename(seeds).rsplit('.')[0]
command = f'./cami.py -n {network} -s {seeds} -id {identifier} -conf {config} -var 50 -f -v;'
subprocess.call(command, shell=True)
\ No newline at end of file
# ========================================
# FileName: camiconf
# Date: 21:00 10.April.2022
# Author: Marcos Chow Castro
# Email: mctechnology170318@gmail.com
# GitHub: https://github.com/mctechnology17
# Brief: main configuration for cami
# =========================================
[domino]
visualization_flag = False
output_name = 'modules.out'
[diamond]
alpha : 1
pred_factor : 2
max_preds : 100
[robust]
initial_fraction = 0.25
reduction_factor = 0.9
number_steiner_trees = 30
threshold = 0.1
[cami]
seed_score = 10.0
# vim: set fdm=marker:
#!/usr/bin/env python3
import pwd
import sys
from os import chdir
import subprocess
chdir((sys.argv[0].rsplit('/', 1))[0])
networkfile = "../data/input/networks/example_network.tsv"
seedfile = "../data/input/seeds/example_seeds.txt"
identifier = "example_run"
command = f'./cami.py -n {networkfile} -s {seedfile} -id {identifier} -d -img -p --f;'
subprocess.call(command, shell=True)
\ No newline at end of file
import graph_tool
def csv2graph(inputfile,
symbol_columns,
delimiter="\t", nof_tools = 4):
"""transforms a csv-file to a Graph() object
:param inputfile: inputfile in csv-format
:type inputfile: str
:param delimiter: delimiter in input csv, defaults to "\t"
:type delimiter: str, optional
:return: Graph() object
:rtype: Graph()
"""
print('Creating the PPI network graph and seed list...')
g = graph_tool.load_graph_from_csv(inputfile, skip_first=True,
csv_options={'delimiter': '\t', 'quotechar': '"'})
if symbol_columns != None:
if len(symbol_columns) == 0:
symbol_columns = ['Official_Symbol_Interactor_A', 'Official_Symbol_Interactor_B']
symbol_columns = tuple(symbol_columns)
g.vertex_properties["symbol"] = g.new_vertex_property('string', val='')
symbol_s, symbol_t = symbol_columns
unseen_vertices = g.num_vertices()
for s,t,sym_s,sym_t in g.iter_edges(eprops=[g.edge_properties[symbol_s], g.edge_properties[symbol_t]]):
if g.vertex_properties["symbol"][s] == '':
g.vertex_properties["symbol"][s] = sym_s
unseen_vertices -= 1
if g.vertex_properties["symbol"][t] == '':
g.vertex_properties["symbol"][t] = sym_t
unseen_vertices -= 1
if unseen_vertices == 0:
break
g.vertex_properties["cami_score"] = g.new_vertex_property("float", val=0.0)
values = (20) * [-1]
g.vertex_properties["predicted_by"] = g.new_vertex_property("vector<int16_t>", val=values)
return g
def txt2lst(seed_file):
"""transforms a \n delimitered textfile of seeds to a list of strings
:param seed_file: path to the desired input file where each line contains
one seed gene, if there are multiple columns, the node ID
has to be in the first line and the values need to be tab
separated
:type seed_file: str
:return: list of seeds
:rtype: lst(str)
"""
seeds = []
with open(seed_file) as file:
for line in file.readlines()[1:]:
seeds.append(line.split("\t")[0].strip())
return seeds
def index2name_dict(g):
"""translates an index of a vertex to its corresponding name in a given
Graph() object
:param vertex: name of a vertex in the Graph()
:type vertex: str
:param g: graph_tool object Graph()
:type g: Graph()
"""
return g.vertex_properties["name"]
def name2index_dict(g):
"""creates a dictionary which translates a name of a vertex to its
corresponding index in a given Graph() object
:param vertex: name of a vertex in the Graph()
:type vertex: str
:param g: graph_tool object Graph()
:type g: Graph()
"""
index2name = g.vertex_properties["name"]
return {index2name[v]:v for v in g.vertices()}
# TODO: review this function, where is it used?
def compare_seeds_and_network(ppi_network, seeds):
seeds_in_network = []
seeds_not_in_network = []
for seed in seeds:
if len(seed) > 0:
seeds_in_network.append(seed)
else:
seeds_not_in_network.append(seed)
if len(seeds_not_in_network) > 0:
print(f'Warning: {len(seeds_not_in_network)} of {len(seeds)} seeds are not in the input PPI-network: {seeds_not_in_network}')
return seeds_in_network
return False
\ No newline at end of file
#!/usr/bin/env python3
import sys
from os.path import basename
import subprocess
print(sys.argv)
network = sys.argv[1]
seedfiles = sys.argv[2:]
for seeds in seedfiles:
identifier = basename(seeds).rsplit('.')[0]
command = f'./cami.py -n {network} -s {seeds} -id {identifier} -img -f -ncbi -d;'
subprocess.call(command, shell=True)
\ No newline at end of file
import requests
def send_request(nodes, edges):
url = 'https://api.drugst.one/create_network'
node_lst=[{'id':symbol} for symbol in nodes]
edge_lst = [{'from':edge[0], 'to':edge[1]} for edge in edges]
data = {'network': {'nodes' : node_lst,
'edges' : edge_lst
}
}
r = requests.post(url, json=data)
id = r.text.strip('"')
result_url = f'https://drugst.one?id={id}'
get_r = requests.get(result_url)
return get_r.url
if __name__ == '__main__':
send_request(['ABCD', 'EFGH', 'IJKL', 'MNOP'], [('ABCD', 'EFGH'),
('IJKL', 'EFGH'), ('MNOP', 'ABCD')])
import sys
from collections import defaultdict
import graph_tool as gt
import graph_tool.centrality as gtc
import graph_tool.stats as gts
import graph_tool.topology as gtt
import graph_tool.util as gtu
import itertools as it
def edge_weights(g, base_weigths, hub_penalty, inverse=False):
avdeg = gts.vertex_average(g, "total")[0]
weights = g.new_edge_property("double", val=avdeg)
if base_weigths is not None:
for v in g.vertices():
weights.a[int(v)] = base_weigths.a[int(v)]
if hub_penalty <= 0:
return weights
if hub_penalty > 1:
raise ValueError("Invalid hub penalty {}.".format(hub_penalty))
for e in g.edges():
edge_avdeg = float(e.source().out_degree() + e.target().out_degree()) / 2.0
penalized_weight = (1.0 - hub_penalty) * avdeg + hub_penalty * edge_avdeg
if inverse:
weights[e] = 1.0 / penalized_weight
else:
weights[e] = penalized_weight
return weights
def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
node_name_attribute = "name" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
mc = gt.Graph(directed=False)
eprop_dist = mc.new_edge_property("int")
mc.ep['dist'] = eprop_dist
vprop_name = mc.new_vertex_property("string")
mc.vp[node_name_attribute] = vprop_name
eprop_path = mc.new_edge_property("object")
mc.ep['path'] = eprop_path
mc_vertex_map = dict()
mc_id_map = dict()
for i in range(len(seeds)):
vert = mc.add_vertex()
vprop_name[i] = seeds[i]
mc_vertex_map[seeds[i]] = vert
mc_id_map[vert] = i
for u, v in it.combinations(seeds, 2):
_, elist = gtt.shortest_path(g, g.vertex(seed_map[u]), g.vertex(seed_map[v]), weights=weights,
negative_weights=False, pred_map=None, dag=False)
e = mc.add_edge(mc_vertex_map[u], mc_vertex_map[v])
eprop_dist[e] = len(elist)
mc.ep.path[e] = list(elist)
mst = gtt.min_spanning_tree(mc, weights=eprop_dist, root=None, tree_map=None)
mc.set_edge_filter(mst)
g2 = gt.Graph(directed=False)
vprop_name = g2.new_vertex_property("string")
g2.vp[node_name_attribute] = vprop_name
g2_vertex_map = dict()
g2_id_map = dict()
addedNodes = set()
for i in range(len(seeds)):
vert = g2.add_vertex()
vprop_name[i] = seeds[i]
g2_vertex_map[seeds[i]] = vert
g2_id_map[vert] = i
addedNodes.add(seeds[i])
allmcedges = []
for mc_edges in mc.edges():
path = mc.ep.path[mc_edges]
allmcedges.extend(path)
j = len(seeds)
allmcedges_g2 = []
for e in allmcedges:
# sourceName = g.vertex_properties["name"][e.source()]
# targetName = g.vertex_properties["name"][e.target()]
sourceName = g.vertex_properties[node_name_attribute][e.source()]
targetName = g.vertex_properties[node_name_attribute][e.target()]
if sourceName not in addedNodes:
vert = g2.add_vertex()
vprop_name[j] = sourceName
g2_vertex_map[sourceName] = vert
g2_id_map[vert] = j
addedNodes.add(sourceName)
j += 1
if targetName not in addedNodes:
vert = g2.add_vertex()
vprop_name[j] = targetName
g2_vertex_map[targetName] = vert
g2_id_map[vert] = j
addedNodes.add(targetName)
j += 1
allmcedges_g2.append(g2.add_edge(g2_vertex_map[sourceName], g2_vertex_map[targetName]))
weights_g2 = g2.new_edge_property("double", val=1.0)
if non_zero_hub_penalty:
for e, e_g2 in zip(allmcedges, allmcedges_g2):
weights_g2[e_g2] = weights[e]
mst2 = gtt.min_spanning_tree(g2, root=None, tree_map=None, weights=weights_g2)
g2.set_edge_filter(mst2)
while True:
noneSteinerLeaves = []
for i in range(g2.num_vertices()):
if g2.vertex(i).out_degree() == 1 and g2.vertex_properties[node_name_attribute][i] not in seeds:
noneSteinerLeaves.append(i)
if len(noneSteinerLeaves) == 0:
break
noneSteinerLeaves = reversed(sorted(noneSteinerLeaves))
for node in noneSteinerLeaves:
try:
g2.remove_edge(g2.edge(g2.vertex(node), g2.get_all_neighbors(node)[0]))
except:
pass
g2.remove_vertex(node)
return g2
def find_bridges(g):
r"""Finds all bridges in a graph."""
global __time
__time = 0
sys.setrecursionlimit(g.num_vertices() + 1)
visited = g.new_vertex_property("boolean", False)
disc = g.new_vertex_property("float", float("inf"))
low = g.new_vertex_property("float", float("inf"))
parent = g.new_vertex_property("int", -1)
is_bridge = g.new_edge_property("boolean", False)
for node in range(g.num_vertices()):
if not visited[node]:
__dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge)
return is_bridge
def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge):
visited[node] = True
global __time
disc[node] = __time
low[node] = __time
__time += 1
for nb in g.get_all_neighbors(node):
if not visited[nb]:
parent[nb] = node
__dfs_find_bridges(g, int(nb), visited, disc, low, parent, is_bridge)
low[node] = min(low[node], low[nb])
if low[nb] > disc[node]:
try:
is_bridge[g.edge(node, nb)] = True
except:
pass
elif int(nb) != parent[node]: #TODO can in theory be removed
low[node] = min(low[node], disc[nb])
def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
scores = defaultdict(lambda:0)
node_name_attribute = 'name'
seed_map = {g.vertex_properties[node_name_attribute][node] :node for node in seed_ids}
seed_ids = list(seed_map.keys())
first_tree = steiner_tree(g, seed_ids, seed_map, weights, hub_penalty > 0)
num_found_trees = 1
tree_edges = []
tree_nodes = set()
for tree_edge in first_tree.edges():
source_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.source()]]
target_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.target()]]
tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]))
tree_nodes.add(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0])
tree_nodes.add(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])
cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges])
returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node
in range(first_tree.num_vertices()))
for vertex in tree_nodes:
scores[vertex] +=1
if num_trees > 1:
is_bridge = find_bridges(g)
edge_filter = g.new_edge_property("boolean", True)
while len(tree_edges) > 0:
tree_edge = tree_edges.pop()
g_edge = g.edge(tree_edge[0], tree_edge[1])
if not is_bridge[g_edge]:
edge_filter[g_edge] = False
g.set_edge_filter(edge_filter)
next_tree = steiner_tree(g, seed_ids, seed_map, weights, hub_penalty > 0)
next_tree_edges = set()
for next_tree_edge in next_tree.edges():
source_name = next_tree.vertex_properties[node_name_attribute][
next_tree.vertex_index[next_tree_edge.source()]]
target_name = next_tree.vertex_properties[node_name_attribute][
next_tree.vertex_index[next_tree_edge.target()]]
tree_nodes.add(
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0])
tree_nodes.add(
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])
next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=source_name)[0],
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=target_name)[0]))
for vertex in tree_nodes:
scores[vertex] += 1
cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges])
if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0):
num_found_trees += 1
for node in range(next_tree.num_vertices()):
returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=next_tree.vertex_properties[node_name_attribute][
node])[0]))
removed_edges = []
for source, target in tree_edges:
if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)):
removed_edges.append((source, target))
for edge in removed_edges:
tree_edges.remove(edge)
g.clear_filters()
edge_filter[g_edge] = True
if num_found_trees >= num_trees:
break
score_prop = g.new_vertex_property("float")
for v,c in scores.items():
score_prop[int(v)]=c
return score_prop
def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
# Call graph-tool to compute TrustRank.
trust = g.new_vertex_property("double")
trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
# Compute and return the results.
return scores
def betweenness(g, seed_ids, hub_penalty, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
scores = g.new_vertex_property("float")
all_pairs = [(source, target) for source in seed_ids for target in seed_ids if source < target]
for source, target in all_pairs:
local_scores = g.new_vertex_property("float")
num_paths = 0.0
for path in gtt.all_shortest_paths(g, source, target, weights=weights):
local_scores.a[path[1:-1]] += 1
num_paths += 1
if num_paths > 0:
local_scores.a /= num_paths
scores.a += local_scores.a
return scores
from AlgorithmWrapper import AlgorithmWrapper
from graph_tool import Graph
import subprocess, os, re, preprocess
# conda install -c conda-forge graph-tool
#MC:
from configparser import ConfigParser
import ast
class DominoWrapper(AlgorithmWrapper):
def __init__(self):
super().__init__()
self.name = 'DOMINO'
self.code = 2
config = ConfigParser()
config.read(self.config)
self.visualization_flag = config.get('domino', 'visualization_flag')
self.output_name = config.get('domino', 'output_name')
self.parallels = config.get('domino', 'para')
self.c = config.get('domino', 'c')
def run_algorithm(self, inputparams):
"""Run Domino algorithm
:param inputparams: list of inputparameters, in this case the paths to
the ppi and seeds
:type inputparams: list(str)
:return: list of predicted nodes
:rtype: list(str)
"""
ppi = inputparams[0]
assert os.path.exists(ppi), f"Could not find PPI-network file {ppi}"
seeds = inputparams[1]
assert os.path.exists(ppi), f"Could not find seed file {seeds}"
slices_file = inputparams[2]
#MC:
#CONFIG: visualization_flag = False
command = f'domino -a "{seeds}" -n "{ppi}" -s "{slices_file}" \
-o "{self.output_dir}" -v {self.visualization_flag} -p {self.parallels} --use_cache {self.c}'
run = subprocess.run(command, shell=True, capture_output=True)
match = re.search("( final modules are reported at )(.*)(\n)", run.stdout.decode('utf-8'))
assert(match!=None), 'DOMINO could not find any modules'
algo_output = match.group(2)
#MC:
#CONFIG output_name = 'modules.out'
assert os.path.exists(algo_output), f'Could not create output file {algo_output} for domino'
outputfilename = self.name_file('out', 'out')
command = f'mv "{algo_output}" "{os.path.join(self.output_dir, outputfilename)}"'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
algo_output = os.path.join(self.output_dir, outputfilename)
print(f"{self.name} results saved in {algo_output}")
return self.extract_output(algo_output)
def prepare_input(self):
"""prepares the input ppi and seed genes as needed by the DOMINO:
ppi_network in as .sif file of the tabular form: node1 edge_type node2
"""
inputparams = []
# prepare inputfiles
print(f'creating {self.name} input files in {self.output_dir}')
ppi_filename = self.name_file('ppi', 'sif')
ppi_file = os.path.join(self.output_dir, ppi_filename)
seed_filename = self.name_file('seeds')
seed_file = os.path.join(self.output_dir, seed_filename)
# create the ppi_file which contains all edges in the ppi_network
with open(ppi_file, "w") as file:
file.write('node1\tppi\tnode2\n')
edges = list(self.ppi_network.edges())
for edge in edges:
file.write(f"{str(edge.source()) + '_'}\tppi\t{str(edge.target()) + '_'}\n")
# the nodes need to be appended by '_' so that pandas recognizes the vertices as strings
inputparams.append(ppi_file)
print(f'{self.name} ppi is saved in {ppi_file}')
with open(seed_file, "w") as file:
file.write('#node\n')
for seed in self.seeds:
file.write(f"{seed}_\n")
inputparams.append(seed_file)
print(f'{self.name} seeds are saved in {seed_file}')
slices_filename = self.name_file('slices')
slices_output = os.path.join(self.output_dir, slices_filename)
if not os.path.exists(slices_output):
print('creating domino slices_file...')
command = f'slicer --network_file "{ppi_file}" --output_file "{slices_output}"'
subprocess.call(command, shell=True, stdout=subprocess.PIPE)
print(f'{self.name} slices are saved in {slices_output}')
inputparams.append(slices_output)
return inputparams
def extract_output(self, algo_output):
nodes = []
with open(algo_output, "r") as output:
for line in output:
for node in re.findall(r'[0-9A-Za-z]+_', line):
nodes.append(int(node[:-1]))
return nodes
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment