Skip to content
Snippets Groups Projects
Commit 35209f31 authored by AndiMajore's avatar AndiMajore
Browse files

all algorithms working for symbol space

Former-commit-id: 356abbba
parent 78e6b792
No related branches found
No related tags found
No related merge requests found
import numpy as np
from tasks.util.read_graph_tool_graph import read_graph_tool_graph from tasks.util.read_graph_tool_graph import read_graph_tool_graph
from tasks.util.scores_to_results import scores_to_results from tasks.util.scores_to_results import scores_to_results
from tasks.util.edge_weights import edge_weights from tasks.util.edge_weights import edge_weights
...@@ -117,7 +118,6 @@ def closeness_centrality(task_hook: TaskHook): ...@@ -117,7 +118,6 @@ def closeness_centrality(task_hook: TaskHook):
# Reasonable default: False. # Reasonable default: False.
# Has no effect unless trust_rank.py is used for ranking drugs. # Has no effect unless trust_rank.py is used for ranking drugs.
include_indirect_drugs = task_hook.parameters.get("include_indirect_drugs", False) include_indirect_drugs = task_hook.parameters.get("include_indirect_drugs", False)
# Type: bool # Type: bool
# Semantics: Sepcifies whether should be included in the analysis when ranking drugs. # Semantics: Sepcifies whether should be included in the analysis when ranking drugs.
# Example: False. # Example: False.
...@@ -164,7 +164,7 @@ def closeness_centrality(task_hook: TaskHook): ...@@ -164,7 +164,7 @@ def closeness_centrality(task_hook: TaskHook):
pdi_dataset = task_hook.parameters.get("pdi_dataset") pdi_dataset = task_hook.parameters.get("pdi_dataset")
search_target = task_hook.parameters.get("target", "drug-target") search_target = task_hook.parameters.get("target", "drug")
filterPaths = task_hook.parameters.get("filter_paths", True) filterPaths = task_hook.parameters.get("filter_paths", True)
...@@ -173,11 +173,12 @@ def closeness_centrality(task_hook: TaskHook): ...@@ -173,11 +173,12 @@ def closeness_centrality(task_hook: TaskHook):
id_space = task_hook.parameters["config"].get("identifier", "symbol") id_space = task_hook.parameters["config"].get("identifier", "symbol")
node_name_attribute = "internal_id"
filename = f"{id_space}_{ppi_dataset['name']}-{pdi_dataset['name']}" filename = f"{id_space}_{ppi_dataset['name']}-{pdi_dataset['name']}"
if ppi_dataset['licenced'] or pdi_dataset['licenced']: if ppi_dataset['licenced'] or pdi_dataset['licenced']:
filename += "_licenced" filename += "_licenced"
filename = os.path.join(task_hook.data_directory, filename + ".gt") filename = os.path.join(task_hook.data_directory, filename + ".gt")
# g, seed_ids, viral_protein_ids, drug_ids = read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits, include_indirect_drugs, include_non_approved_drugs)
g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, id_space, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target) g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, id_space, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target)
task_hook.set_progress(1 / 4.0, "Computing edge weights.") task_hook.set_progress(1 / 4.0, "Computing edge weights.")
weights = edge_weights(g, hub_penalty) weights = edge_weights(g, hub_penalty)
...@@ -189,12 +190,15 @@ def closeness_centrality(task_hook: TaskHook): ...@@ -189,12 +190,15 @@ def closeness_centrality(task_hook: TaskHook):
# Call graph-tool to compute TrustRank. # Call graph-tool to compute TrustRank.
task_hook.set_progress(2 / 4.0, "Computing shortest path closeness centralities.") task_hook.set_progress(2 / 4.0, "Computing shortest path closeness centralities.")
all_dists = [] all_dists = []
# score_nodes = drug_ids if search_target == 'drug' else seed_ids
for node in seed_ids: for node in seed_ids:
all_dists.append(gtt.shortest_distance(g, node, weights=weights)) dists = gtt.shortest_distance(g, node, weights=weights).get_array()
scores = len(seeds) / sum([dists.get_array() for dists in all_dists]) dists[dists == np.inf] = 99999999999
all_dists.append(dists+1)
scores = len(seed_ids) / (sum([dists for dists in all_dists]))
# Compute and return the results. # Compute and return the results.
task_hook.set_progress(3 / 4.0, "Formating results.") task_hook.set_progress(3 / 4.0, "Formatting results.")
# task_hook.set_results(scores_to_results(strain_or_drugs, result_size, g, seed_ids, viral_protein_ids, drug_ids, scores))
task_hook.set_results(scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filterPaths)) task_hook.set_results(scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filterPaths))
...@@ -79,7 +79,7 @@ def network_proximity(task_hook: TaskHook): ...@@ -79,7 +79,7 @@ def network_proximity(task_hook: TaskHook):
filter_paths = task_hook.parameters.get("filter_paths", True) filter_paths = task_hook.parameters.get("filter_paths", True)
node_name_attribute = "drugstone_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
# Set number of threads if OpenMP support is enabled. # Set number of threads if OpenMP support is enabled.
if gt.openmp_enabled(): if gt.openmp_enabled():
gt.openmp_set_num_threads(num_threads) gt.openmp_set_num_threads(num_threads)
......
from tasks.task_hook import TaskHook from tasks.task_hook import TaskHook
def infer_node_type(node): # TODO: This needs to be improved
if len(node) == 6 or len(node) == 10:
return 'protein'
# if node.startswith('DB'):
# return 'drug'
# return 'virus'
if node.startswith('DB'):
return 'drug'
return 'protein'
def quick_task(task_hook: TaskHook): def quick_task(task_hook: TaskHook):
def run_closeness(parameters): def run_closeness(parameters, network):
from .closeness_centrality import closeness_centrality from .closeness_centrality import closeness_centrality
def closeness_progress(progress, status): def closeness_progress(progress, status):
task_hook.set_progress(2 / 3 + 1 / 3 * progress, status) task_hook.set_progress(2 / 3 + 1 / 3 * progress, status)
def closeness_set_result(result): def closeness_set_result(result):
result["network"]["edges"].extend(network["edges"])
task_hook.set_results(result) task_hook.set_results(result)
# Prepare intermediate hook # Prepare intermediate hook
...@@ -31,25 +20,6 @@ def quick_task(task_hook: TaskHook): ...@@ -31,25 +20,6 @@ def quick_task(task_hook: TaskHook):
# Run closeness centrality # Run closeness centrality
closeness_centrality(closeness_task_hook) closeness_centrality(closeness_task_hook)
def run_trust_rank(parameters, seeds):
from .trust_rank import trust_rank
def progress(progress, status):
task_hook.set_progress(2 / 3 + 1 / 3 * progress, status)
def set_result(result):
task_hook.set_results(result)
parameters.update({
"seeds": seeds,
"result_size": 20,
"include_non_approved_drugs": True,
"include_indirect_drugs": False,
"target":"drug"
})
tr_task_hook = TaskHook(parameters, task_hook.data_directory, progress, set_result)
trust_rank(tr_task_hook)
def run_multi_steiner(parameters): def run_multi_steiner(parameters):
from .multi_steiner import multi_steiner from .multi_steiner import multi_steiner
...@@ -60,15 +30,20 @@ def quick_task(task_hook: TaskHook): ...@@ -60,15 +30,20 @@ def quick_task(task_hook: TaskHook):
def ms_set_result(result): def ms_set_result(result):
node_attributes = result.get("node_attributes", {}) node_attributes = result.get("node_attributes", {})
node_types = node_attributes.get("node_types", {}) node_types = node_attributes.get("node_types", {})
# seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'host' or seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'protein']
# (not node_types.get(seed) and infer_node_type(seed) == 'host')]
seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'protein' or
(not node_types.get(seed) and infer_node_type(seed) == 'protein')]
if len(seeds) == 0: if len(seeds) == 0:
task_hook.set_results({"network": {"nodes": [], "edges": []}}) task_hook.set_results({"network": {"nodes": [], "edges": []}})
return return
run_trust_rank(parameters, seeds) parameters.update({
"seeds": seeds,
"result_size": 10,
"hub_penalty": 1,
"target": "drug",
"include_non_approved_drugs": True
})
run_closeness(parameters, result["network"])
parameters["num_trees"] = 1 parameters["num_trees"] = 1
parameters["hub_penalty"] = 1 parameters["hub_penalty"] = 1
......
...@@ -218,8 +218,8 @@ def trust_rank(task_hook: TaskHook): ...@@ -218,8 +218,8 @@ def trust_rank(task_hook: TaskHook):
trust = g.new_vertex_property("double") trust = g.new_vertex_property("double")
trust.a[seed_ids] = 1.0 / len(seed_ids) trust.a[seed_ids] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights) scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
# Compute and return the results. # Compute and return the results.
task_hook.set_progress(3 / 4.0, "Formating results.") task_hook.set_progress(3 / 4.0, "Formating results.")
# Convert results to useful output and save it # Convert results to useful output and save it
task_hook.set_results(scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filter_paths)) results = scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filter_paths)
task_hook.set_results(results)
...@@ -51,7 +51,6 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_ ...@@ -51,7 +51,6 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
# Remove all unconnected nodes TODO probably already skip when creating .gt files # Remove all unconnected nodes TODO probably already skip when creating .gt files
if g.vertex(node).out_degree() == 0 and target == 'drug': if g.vertex(node).out_degree() == 0 and target == 'drug':
deleted_nodes.append(node) deleted_nodes.append(node)
# if not g.vertex_properties["name"][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
elif not g.vertex_properties[node_name_attribute][node] in set(seeds) and ( elif not g.vertex_properties[node_name_attribute][node] in set(seeds) and (
g.vertex(node).out_degree() > max_deg): g.vertex(node).out_degree() > max_deg):
deleted_nodes.append(node) deleted_nodes.append(node)
...@@ -74,8 +73,8 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_ ...@@ -74,8 +73,8 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
if include_non_approved_drugs: if include_non_approved_drugs:
drug_ids.append(node) drug_ids.append(node)
else: else:
drug_groups = g.vertex_properties["status"][node].split(', ') # drug_groups = g.vertex_properties["status"][node].split(', ')
if "approved" in drug_groups: if "approved" in g.vertex_properties["status"][node]:
drug_ids.append(node) drug_ids.append(node)
# Delete edges that should be ignored or are not contained in the selected dataset. # Delete edges that should be ignored or are not contained in the selected dataset.
...@@ -111,26 +110,26 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_ ...@@ -111,26 +110,26 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
if indir_drug and int(edge.target()) in drug_ids: if indir_drug and int(edge.target()) in drug_ids:
drug_ids.remove(int(edge.target())) drug_ids.remove(int(edge.target()))
elif g.vertex_properties["type"][ elif g.vertex_properties["type"][edge.source()] == d_type and \
edge.source()] == d_type and edge.source() not in direct_drugs or edge.target() not in seed_ids: edge.source() not in direct_drugs or edge.target() not in seed_ids:
indir_drug = edge.source() not in direct_drugs indir_drug = edge.source() not in direct_drugs
not_seed = edge.target() not in seed_ids not_seed = edge.target() not in seed_ids
if indir_drug or not_seed: if indir_drug or not_seed:
deleted_edges.append(edge) deleted_edges.append(edge)
if indir_drug and int(edge.source()) in drug_ids: if indir_drug and int(edge.source()) in drug_ids:
drug_ids.remove(int(edge.source())) drug_ids.remove(int(edge.source()))
else: # else:
deleted_edges.append(edge) # deleted_edges.append(edge)
g.set_fast_edge_removal(fast=True) g.set_fast_edge_removal(fast=True)
for edge in deleted_edges: for edge in deleted_edges:
g.remove_edge(edge) g.remove_edge(edge)
g.set_fast_edge_removal(fast=False) g.set_fast_edge_removal(fast=False)
vertices = 0 # vertices = 0
for _ in g.vertices(): # for _ in g.vertices():
vertices += 1 # vertices += 1
edges = 0 # edges = 0
for _ in g.edges(): # for _ in g.edges():
edges += 1 # edges += 1
# Return the graph and the indices of the seed_ids and the seeds. # Return the graph and the indices of the seed_ids and the seeds.
return g, list(seed_ids.keys()), drug_ids return g, list(seed_ids.keys()), drug_ids
...@@ -16,8 +16,6 @@ def scores_to_results( ...@@ -16,8 +16,6 @@ def scores_to_results(
r"""Transforms the scores to the required result format.""" r"""Transforms the scores to the required result format."""
node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
candidates = []
# if strain_or_drugs == "drugs":
if target == "drug": if target == "drug":
candidates = [(node, scores[node]) for node in drug_ids if scores[node] > 0] candidates = [(node, scores[node]) for node in drug_ids if scores[node] > 0]
else: else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment