Skip to content
Snippets Groups Projects
Commit 56815305 authored by AndiMajore's avatar AndiMajore
Browse files

all algorithms working for symbol space

Former-commit-id: e1e10a4eb4ea5d29f363d583218e58717b4ea711 [formerly 8d18e74f57420d754b5ece6537fe0260468e7445]
Former-commit-id: aa0d2d2290e67a30b44018c517ba073e5c3dd153
parent 65569b98
Branches
No related tags found
No related merge requests found
import numpy as np
from tasks.util.read_graph_tool_graph import read_graph_tool_graph
from tasks.util.scores_to_results import scores_to_results
from tasks.util.edge_weights import edge_weights
......@@ -117,7 +118,6 @@ def closeness_centrality(task_hook: TaskHook):
# Reasonable default: False.
# Has no effect unless trust_rank.py is used for ranking drugs.
include_indirect_drugs = task_hook.parameters.get("include_indirect_drugs", False)
# Type: bool
# Semantics: Sepcifies whether should be included in the analysis when ranking drugs.
# Example: False.
......@@ -164,7 +164,7 @@ def closeness_centrality(task_hook: TaskHook):
pdi_dataset = task_hook.parameters.get("pdi_dataset")
search_target = task_hook.parameters.get("target", "drug-target")
search_target = task_hook.parameters.get("target", "drug")
filterPaths = task_hook.parameters.get("filter_paths", True)
......@@ -173,11 +173,12 @@ def closeness_centrality(task_hook: TaskHook):
id_space = task_hook.parameters["config"].get("identifier", "symbol")
node_name_attribute = "internal_id"
filename = f"{id_space}_{ppi_dataset['name']}-{pdi_dataset['name']}"
if ppi_dataset['licenced'] or pdi_dataset['licenced']:
filename += "_licenced"
filename = os.path.join(task_hook.data_directory, filename + ".gt")
# g, seed_ids, viral_protein_ids, drug_ids = read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits, include_indirect_drugs, include_non_approved_drugs)
g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, id_space, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target)
task_hook.set_progress(1 / 4.0, "Computing edge weights.")
weights = edge_weights(g, hub_penalty)
......@@ -189,12 +190,15 @@ def closeness_centrality(task_hook: TaskHook):
# Call graph-tool to compute TrustRank.
task_hook.set_progress(2 / 4.0, "Computing shortest path closeness centralities.")
all_dists = []
# score_nodes = drug_ids if search_target == 'drug' else seed_ids
for node in seed_ids:
all_dists.append(gtt.shortest_distance(g, node, weights=weights))
scores = len(seeds) / sum([dists.get_array() for dists in all_dists])
dists = gtt.shortest_distance(g, node, weights=weights).get_array()
dists[dists == np.inf] = 99999999999
all_dists.append(dists+1)
scores = len(seed_ids) / (sum([dists for dists in all_dists]))
# Compute and return the results.
task_hook.set_progress(3 / 4.0, "Formating results.")
# task_hook.set_results(scores_to_results(strain_or_drugs, result_size, g, seed_ids, viral_protein_ids, drug_ids, scores))
task_hook.set_progress(3 / 4.0, "Formatting results.")
task_hook.set_results(scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filterPaths))
......@@ -79,7 +79,7 @@ def network_proximity(task_hook: TaskHook):
filter_paths = task_hook.parameters.get("filter_paths", True)
node_name_attribute = "drugstone_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
# Set number of threads if OpenMP support is enabled.
if gt.openmp_enabled():
gt.openmp_set_num_threads(num_threads)
......
from tasks.task_hook import TaskHook
def infer_node_type(node): # TODO: This needs to be improved
if len(node) == 6 or len(node) == 10:
return 'protein'
# if node.startswith('DB'):
# return 'drug'
# return 'virus'
if node.startswith('DB'):
return 'drug'
return 'protein'
def quick_task(task_hook: TaskHook):
def run_closeness(parameters):
def run_closeness(parameters, network):
from .closeness_centrality import closeness_centrality
def closeness_progress(progress, status):
task_hook.set_progress(2 / 3 + 1 / 3 * progress, status)
def closeness_set_result(result):
result["network"]["edges"].extend(network["edges"])
task_hook.set_results(result)
# Prepare intermediate hook
......@@ -31,25 +20,6 @@ def quick_task(task_hook: TaskHook):
# Run closeness centrality
closeness_centrality(closeness_task_hook)
def run_trust_rank(parameters, seeds):
from .trust_rank import trust_rank
def progress(progress, status):
task_hook.set_progress(2 / 3 + 1 / 3 * progress, status)
def set_result(result):
task_hook.set_results(result)
parameters.update({
"seeds": seeds,
"result_size": 20,
"include_non_approved_drugs": True,
"include_indirect_drugs": False,
"target":"drug"
})
tr_task_hook = TaskHook(parameters, task_hook.data_directory, progress, set_result)
trust_rank(tr_task_hook)
def run_multi_steiner(parameters):
from .multi_steiner import multi_steiner
......@@ -60,15 +30,20 @@ def quick_task(task_hook: TaskHook):
def ms_set_result(result):
node_attributes = result.get("node_attributes", {})
node_types = node_attributes.get("node_types", {})
# seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'host' or
# (not node_types.get(seed) and infer_node_type(seed) == 'host')]
seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'protein' or
(not node_types.get(seed) and infer_node_type(seed) == 'protein')]
seeds = [seed for seed in result["network"]["nodes"] if node_types.get(seed) == 'protein']
if len(seeds) == 0:
task_hook.set_results({"network": {"nodes": [], "edges": []}})
return
run_trust_rank(parameters, seeds)
parameters.update({
"seeds": seeds,
"result_size": 10,
"hub_penalty": 1,
"target": "drug",
"include_non_approved_drugs": True
})
run_closeness(parameters, result["network"])
parameters["num_trees"] = 1
parameters["hub_penalty"] = 1
......
......@@ -218,8 +218,8 @@ def trust_rank(task_hook: TaskHook):
trust = g.new_vertex_property("double")
trust.a[seed_ids] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
# Compute and return the results.
task_hook.set_progress(3 / 4.0, "Formating results.")
# Convert results to useful output and save it
task_hook.set_results(scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filter_paths))
results = scores_to_results(search_target, result_size, g, seed_ids, drug_ids, scores, ppi_dataset, pdi_dataset, filter_paths)
task_hook.set_results(results)
......@@ -51,7 +51,6 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
# Remove all unconnected nodes TODO probably already skip when creating .gt files
if g.vertex(node).out_degree() == 0 and target == 'drug':
deleted_nodes.append(node)
# if not g.vertex_properties["name"][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
elif not g.vertex_properties[node_name_attribute][node] in set(seeds) and (
g.vertex(node).out_degree() > max_deg):
deleted_nodes.append(node)
......@@ -74,8 +73,8 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
if include_non_approved_drugs:
drug_ids.append(node)
else:
drug_groups = g.vertex_properties["status"][node].split(', ')
if "approved" in drug_groups:
# drug_groups = g.vertex_properties["status"][node].split(', ')
if "approved" in g.vertex_properties["status"][node]:
drug_ids.append(node)
# Delete edges that should be ignored or are not contained in the selected dataset.
......@@ -111,26 +110,26 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
if indir_drug and int(edge.target()) in drug_ids:
drug_ids.remove(int(edge.target()))
elif g.vertex_properties["type"][
edge.source()] == d_type and edge.source() not in direct_drugs or edge.target() not in seed_ids:
elif g.vertex_properties["type"][edge.source()] == d_type and \
edge.source() not in direct_drugs or edge.target() not in seed_ids:
indir_drug = edge.source() not in direct_drugs
not_seed = edge.target() not in seed_ids
if indir_drug or not_seed:
deleted_edges.append(edge)
if indir_drug and int(edge.source()) in drug_ids:
drug_ids.remove(int(edge.source()))
else:
deleted_edges.append(edge)
# else:
# deleted_edges.append(edge)
g.set_fast_edge_removal(fast=True)
for edge in deleted_edges:
g.remove_edge(edge)
g.set_fast_edge_removal(fast=False)
vertices = 0
for _ in g.vertices():
vertices += 1
edges = 0
for _ in g.edges():
edges += 1
# vertices = 0
# for _ in g.vertices():
# vertices += 1
# edges = 0
# for _ in g.edges():
# edges += 1
# Return the graph and the indices of the seed_ids and the seeds.
return g, list(seed_ids.keys()), drug_ids
......@@ -16,8 +16,6 @@ def scores_to_results(
r"""Transforms the scores to the required result format."""
node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
candidates = []
# if strain_or_drugs == "drugs":
if target == "drug":
candidates = [(node, scores[node]) for node in drug_ids if scores[node] > 0]
else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment