Skip to content
Snippets Groups Projects
Commit 185bd346 authored by AndiMajore's avatar AndiMajore
Browse files

all algorithms working for symbol space

parent 4329012d
No related branches found
No related tags found
No related merge requests found
Pipeline #12179 failed
import numpy as np
from tasks.util.read_graph_tool_graph import read_graph_tool_graph
from tasks.util.scores_to_results import scores_to_results
from tasks.util.edge_weights import edge_weights
......@@ -117,7 +118,6 @@ def closeness_centrality(task_hook: TaskHook):
# Reasonable default: False.
# Has no effect unless trust_rank.py is used for ranking drugs.
include_indirect_drugs = task_hook.parameters.get("include_indirect_drugs", False)
# Type: bool
# Semantics: Sepcifies whether should be included in the analysis when ranking drugs.
# Example: False.
......@@ -164,7 +164,7 @@ def closeness_centrality(task_hook: TaskHook):
pdi_dataset = task_hook.parameters.get("pdi_dataset")
search_target = task_hook.parameters.get("target", "drug-target")
search_target = task_hook.parameters.get("target", "drug")
filterPaths = task_hook.parameters.get("filter_paths", True)
......@@ -173,11 +173,12 @@ def closeness_centrality(task_hook: TaskHook):
id_space = task_hook.parameters["config"].get("identifier", "symbol")
node_name_attribute = "internal_id"
filename = f"{id_space}_{ppi_dataset['name']}-{pdi_dataset['name']}"
if ppi_dataset['licenced'] or pdi_dataset['licenced']:
filename += "_licenced"
filename = os.path.join(task_hook.data_directory, filename + ".gt")
# g, seed_ids, viral_protein_ids, drug_ids = read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits, include_indirect_drugs, include_non_approved_drugs)
g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, id_space, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target)
task_hook.set_progress(1 / 4.0, "Computing edge weights.")
weights = edge_weights(g, hub_penalty)
......@@ -189,12 +190,15 @@ def closeness_centrality(task_hook: TaskHook):
# Call graph-tool to compute TrustRank.
task_hook.set_progress(2 / 4.0, "Computing shortest path closeness centralities.")
all_dists = []
# score_nodes = drug_ids if search_target == 'drug' else seed_ids
for node in seed_ids:
all_dists.append(gtt.shortest_distance(g, node, weights=weights))
scores = len(seeds) / sum([dists.get_array() for dists in all_dists])
dists = gtt.shortest_distance(g, node, weights=weights).get_array()
dists[dists == np.inf] = 99999999999
all_dists.append(dists+1)
scores = len(seed_ids) / (sum([dists for dists in all_dists]))
# Compute and return the results.
task_hook.set_progress(3 / 4.0, "Formating results.")
# task_hook.set_results(scores_to_results(strain_or_drugs, result_size, g, seed_ids, viral_protein_ids, drug_ids, scores))
task_hook.set_progress(3 / 4.0, "Formatting results.")
task_hook.set_results(scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filterPaths))
......@@ -79,7 +79,7 @@ def network_proximity(task_hook: TaskHook):
filter_paths = task_hook.parameters.get("filter_paths", True)
node_name_attribute = "drugstone_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
# Set number of threads if OpenMP support is enabled.
if gt.openmp_enabled():
gt.openmp_set_num_threads(num_threads)
......
from tasks.task_hook import TaskHook
def infer_node_type(node): # TODO: This needs to be improved
if len(node) == 6 or len(node) == 10:
return 'protein'
# if node.startswith('DB'):
# return 'drug'
# return 'virus'
if node.startswith('DB'):
return 'drug'
return 'protein'
def quick_task(task_hook: TaskHook):
def run_closeness(parameters):
def run_closeness(parameters, network):
from .closeness_centrality import closeness_centrality
def closeness_progress(progress, status):
task_hook.set_progress(2 / 3 + 1 / 3 * progress, status)
def closeness_set_result(result):
result["network"]["edges"].extend(network["edges"])
task_hook.set_results(result)
# Prepare intermediate hook
......@@ -31,25 +20,6 @@ def quick_task(task_hook: TaskHook):
# Run closeness centrality
closeness_centrality(closeness_task_hook)
def run_trust_rank(parameters, seeds):
from .trust_rank import trust_rank
def progress(progress, status):
task_hook.set_progress(2 / 3 + 1 / 3 * progress, status)
def set_result(result):
task_hook.set_results(result)
parameters.update({
"seeds": seeds,
"result_size": 20,
"include_non_approved_drugs": True,
"include_indirect_drugs": False,
"target":"drug"
})
tr_task_hook = TaskHook(parameters, task_hook.data_directory, progress, set_result)
trust_rank(tr_task_hook)
def run_multi_steiner(parameters):
from .multi_steiner import multi_steiner
......@@ -60,15 +30,20 @@ def quick_task(task_hook: TaskHook):
def ms_set_result(result):
node_attributes = result.get("node_attributes", {})
node_types = node_attributes.get("node_types", {})
# seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'host' or
# (not node_types.get(seed) and infer_node_type(seed) == 'host')]
seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'protein' or
(not node_types.get(seed) and infer_node_type(seed) == 'protein')]
seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'protein']
if len(seeds) == 0:
task_hook.set_results({"network": {"nodes": [], "edges": []}})
return
run_trust_rank(parameters, seeds)
parameters.update({
"seeds": seeds,
"result_size": 10,
"hub_penalty": 1,
"target": "drug",
"include_non_approved_drugs": True
})
run_closeness(parameters, result["network"])
parameters["num_trees"] = 1
parameters["hub_penalty"] = 1
......
......@@ -218,8 +218,8 @@ def trust_rank(task_hook: TaskHook):
trust = g.new_vertex_property("double")
trust.a[seed_ids] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
# Compute and return the results.
task_hook.set_progress(3 / 4.0, "Formating results.")
# Convert results to useful output and save it
task_hook.set_results(scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filter_paths))
results = scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filter_paths)
task_hook.set_results(results)
......@@ -51,7 +51,6 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
# Remove all unconnected nodes TODO probably already skip when creating .gt files
if g.vertex(node).out_degree() == 0 and target == 'drug':
deleted_nodes.append(node)
# if not g.vertex_properties["name"][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
elif not g.vertex_properties[node_name_attribute][node] in set(seeds) and (
g.vertex(node).out_degree() > max_deg):
deleted_nodes.append(node)
......@@ -74,8 +73,8 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
if include_non_approved_drugs:
drug_ids.append(node)
else:
drug_groups = g.vertex_properties["status"][node].split(', ')
if "approved" in drug_groups:
# drug_groups = g.vertex_properties["status"][node].split(', ')
if "approved" in g.vertex_properties["status"][node]:
drug_ids.append(node)
# Delete edges that should be ignored or are not contained in the selected dataset.
......@@ -111,26 +110,26 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
if indir_drug and int(edge.target()) in drug_ids:
drug_ids.remove(int(edge.target()))
elif g.vertex_properties["type"][
edge.source()] == d_type and edge.source() not in direct_drugs or edge.target() not in seed_ids:
elif g.vertex_properties["type"][edge.source()] == d_type and \
edge.source() not in direct_drugs or edge.target() not in seed_ids:
indir_drug = edge.source() not in direct_drugs
not_seed = edge.target() not in seed_ids
if indir_drug or not_seed:
deleted_edges.append(edge)
if indir_drug and int(edge.source()) in drug_ids:
drug_ids.remove(int(edge.source()))
else:
deleted_edges.append(edge)
# else:
# deleted_edges.append(edge)
g.set_fast_edge_removal(fast=True)
for edge in deleted_edges:
g.remove_edge(edge)
g.set_fast_edge_removal(fast=False)
vertices = 0
for _ in g.vertices():
vertices += 1
edges = 0
for _ in g.edges():
edges += 1
# vertices = 0
# for _ in g.vertices():
# vertices += 1
# edges = 0
# for _ in g.edges():
# edges += 1
# Return the graph and the indices of the seed_ids and the seeds.
return g, list(seed_ids.keys()), drug_ids
......@@ -16,8 +16,6 @@ def scores_to_results(
r"""Transforms the scores to the required result format."""
node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
candidates = []
# if strain_or_drugs == "drugs":
if target == "drug":
candidates = [(node, scores[node]) for node in drug_ids if scores[node] > 0]
else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment