Skip to content
Snippets Groups Projects
Commit 9ff6a771 authored by AndiMajore's avatar AndiMajore
Browse files

fixed must and kpm; removed prints

Former-commit-id: 03b7124d
parent e44fee44
No related branches found
No related tags found
No related merge requests found
import base64 import base64
import datetime import datetime
import itertools
import json import json
import random import random
import string import string
...@@ -199,18 +200,32 @@ def kpm_task(task_hook: TaskHook): ...@@ -199,18 +200,32 @@ def kpm_task(task_hook: TaskHook):
# Remapping everything from UniProt Accession numbers to internal IDs # Remapping everything from UniProt Accession numbers to internal IDs
result_nodes = Protein.objects.filter(uniprot_code__in=network["nodes"]) result_nodes = Protein.objects.filter(uniprot_code__in=network["nodes"])
node_map = {} node_map = {}
node_map_for_edges = {}
for node in result_nodes: for node in result_nodes:
node_map[node.uniprot_code] = node.id node_map_for_edges[node.uniprot_code] = node.id
network["nodes"] = list(map(lambda uniprot: "p" + str(node_map[uniprot]), network["nodes"])) if id_space == 'symbol':
node_map[node.uniprot_code] = [node.gene]
if id_space == 'entrez':
node_map[node.uniprot_code] = [node.entrez]
if id_space == 'uniprot':
node_map[node.uniprot_code] = [node.uniprot_code]
if id_space == 'ensembl':
node_map[node.uniprot_code] = [ensg.name for ensg in EnsemblGene.objects.filter(protein_id=node.id)]
flat_map = lambda f, xs: [y for ys in xs for y in f(ys)]
network["nodes"] = flat_map(lambda uniprot: node_map[uniprot], network["nodes"])
network["edges"] = list(map( network["edges"] = list(map(
lambda uniprot_edge: {"from": "p" + str(node_map[uniprot_edge["from"]]), lambda uniprot_edge: {"from": "p" + str(node_map_for_edges[uniprot_edge["from"]]),
"to": "p" + str(node_map[uniprot_edge["to"]])}, "to": "p" + str(node_map_for_edges[uniprot_edge["to"]])},
network["edges"])) network["edges"]))
node_types = {node: "protein" for node in network["nodes"]} node_types = {node: "protein" for node in network["nodes"]}
is_seed = {node: node in set(map(lambda p: "p"+str(p),protein_backend_ids)) for node in network["nodes"]} is_seed = {node: node in set(map(lambda p: "p"+str(p),protein_backend_ids)) for node in network["nodes"]}
result_dict = { result_dict = {
"network": network, "network": network,
"target_nodes":[node for node in network["nodes"] if node not in task_hook.seeds],
"node_attributes": {"node_types": node_types, "is_seed": is_seed} "node_attributes": {"node_types": node_types, "is_seed": is_seed}
} }
task_hook.set_results(results=result_dict) task_hook.set_results(results=result_dict)
...@@ -113,74 +113,48 @@ def multi_steiner(task_hook: TaskHook): ...@@ -113,74 +113,48 @@ def multi_steiner(task_hook: TaskHook):
filename += "_licenced" filename += "_licenced"
filename = os.path.join(task_hook.data_directory, filename + ".gt") filename = os.path.join(task_hook.data_directory, filename + ".gt")
g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target) g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target)
# seed_map = {g.vertex_properties["name"][node]: node for node in seed_ids}
seed_map = {g.vertex_properties[node_name_attribute][node]: node for node in seed_ids} seed_map = {g.vertex_properties[node_name_attribute][node]: node for node in seed_ids}
task_hook.set_progress(1 / (float(num_trees + 3)), "Computing edge weights.") task_hook.set_progress(1 / (float(num_trees + 3)), "Computing edge weights.")
weights = edge_weights(g, hub_penalty) weights = edge_weights(g, hub_penalty)
# Find first steiner trees # Find first steiner trees
seeds = list(filter(lambda s: s in seed_map, seeds)) seeds = list(filter(lambda s: s in seed_map, seeds))
print(seeds)
print(seed_ids)
task_hook.set_progress(2 / (float(num_trees + 3)), "Computing Steiner tree 1 of {}.".format(num_trees)) task_hook.set_progress(2 / (float(num_trees + 3)), "Computing Steiner tree 1 of {}.".format(num_trees))
first_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0) first_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0)
num_found_trees = 1 num_found_trees = 1
tree_edges = [] tree_edges = []
for tree_edge in first_tree.edges(): for tree_edge in first_tree.edges():
# source_name = first_tree.vertex_properties["name"][first_tree.vertex_index[tree_edge.source()]]
# target_name = first_tree.vertex_properties["name"][first_tree.vertex_index[tree_edge.target()]]
# tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0]))
source_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.source()]] source_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.source()]]
target_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.target()]] target_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.target()]]
tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]))
cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges]) cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges])
# returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=first_tree.vertex_properties["name"][node])[0]) for node in range(first_tree.num_vertices()))
print(f"Before gtu: Costs={cost_first_tree}")
returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node in range(first_tree.num_vertices())) returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node in range(first_tree.num_vertices()))
print(f"After gtu: {returned_nodes}")
print(num_trees)
if num_trees > 1: if num_trees > 1:
print("num_trees > 1")
is_bridge = find_bridges(g) is_bridge = find_bridges(g)
print("found bridges")
edge_filter = g.new_edge_property("boolean", True) edge_filter = g.new_edge_property("boolean", True)
print("filtered edges")
found_new_tree = True found_new_tree = True
while len(tree_edges) > 0: while len(tree_edges) > 0:
print(f"Tree edges length: {len(tree_edges)}")
if found_new_tree: if found_new_tree:
task_hook.set_progress(float(num_found_trees + 2) / (float(num_trees + 3)), "Computing Steiner tree {} of {}.".format(num_found_trees + 1, num_trees)) task_hook.set_progress(float(num_found_trees + 2) / (float(num_trees + 3)), "Computing Steiner tree {} of {}.".format(num_found_trees + 1, num_trees))
found_new_tree = False found_new_tree = False
tree_edge = tree_edges.pop() tree_edge = tree_edges.pop()
print("1")
g_edge = g.edge(tree_edge[0], tree_edge[1]) g_edge = g.edge(tree_edge[0], tree_edge[1])
if not is_bridge[g_edge]: if not is_bridge[g_edge]:
print("2")
edge_filter[g_edge] = False edge_filter[g_edge] = False
g.set_edge_filter(edge_filter) g.set_edge_filter(edge_filter)
next_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0) next_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0)
print("3")
next_tree_edges = set() next_tree_edges = set()
for next_tree_edge in next_tree.edges(): for next_tree_edge in next_tree.edges():
# source_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.source()]]
# target_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.target()]]
# next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0]))
print("4")
source_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.source()]] source_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.source()]]
target_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.target()]] target_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.target()]]
next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]))
cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges]) cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges])
if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0): if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0):
print("5")
found_new_tree = True found_new_tree = True
num_found_trees += 1 num_found_trees += 1
for node in range(next_tree.num_vertices()): for node in range(next_tree.num_vertices()):
print("GTU again")
# returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=next_tree.vertex_properties["name"][node])[0]))
returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],match=next_tree.vertex_properties[node_name_attribute][node])[0])) returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],match=next_tree.vertex_properties[node_name_attribute][node])[0]))
print("GTU done")
removed_edges = [] removed_edges = []
print("6")
for source, target in tree_edges: for source, target in tree_edges:
if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)): if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)):
removed_edges.append((source, target)) removed_edges.append((source, target))
...@@ -190,18 +164,15 @@ def multi_steiner(task_hook: TaskHook): ...@@ -190,18 +164,15 @@ def multi_steiner(task_hook: TaskHook):
edge_filter[g_edge] = True edge_filter[g_edge] = True
if num_found_trees >= num_trees: if num_found_trees >= num_trees:
break break
task_hook.set_progress((float(num_trees + 2)) / (float(num_trees + 3)), "Formatting results") task_hook.set_progress((float(num_trees + 2)) / (float(num_trees + 3)), "Formatting results")
returned_edges = [] returned_edges = []
for node in returned_nodes: for node in returned_nodes:
for neighbor in g.get_all_neighbors(node): for neighbor in g.get_all_neighbors(node):
if int(neighbor) > node and int(neighbor) in returned_nodes: if int(neighbor) > node and int(neighbor) in returned_nodes:
returned_edges.append((node, int(neighbor))) returned_edges.append((node, int(neighbor)))
# subgraph = {"nodes": [g.vertex_properties["name"][node] for node in returned_nodes],
# "edges": [{"from": g.vertex_properties["name"][source], "to": g.vertex_properties["name"][target]} for source, target in returned_edges]} accepted_nodes = [g.vertex_properties[node_name_attribute][node] for node in returned_nodes]
# node_types = {g.vertex_properties["name"][node]: g.vertex_properties["type"][node] for node in returned_nodes} subgraph = {"nodes": accepted_nodes,
# is_seed = {g.vertex_properties["name"][node]: node in set(seed_ids) for node in returned_nodes}
subgraph = {"nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes],
"edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for "edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for
source, target in returned_edges]} source, target in returned_edges]}
node_types = {g.vertex_properties[node_name_attribute][node]: g.vertex_properties["type"][node] for node in returned_nodes} node_types = {g.vertex_properties[node_name_attribute][node]: g.vertex_properties["type"][node] for node in returned_nodes}
...@@ -209,6 +180,7 @@ def multi_steiner(task_hook: TaskHook): ...@@ -209,6 +180,7 @@ def multi_steiner(task_hook: TaskHook):
task_hook.set_results({ task_hook.set_results({
"network": subgraph, "network": subgraph,
"node_attributes": {"node_types": node_types, "is_seed": is_seed}, "node_attributes": {"node_types": node_types, "is_seed": is_seed},
"taget_nodes":accepted_nodes,
'gene_interaction_dataset': ppi_dataset, 'gene_interaction_dataset': ppi_dataset,
'drug_interaction_dataset': pdi_dataset 'drug_interaction_dataset': pdi_dataset
}) })
...@@ -216,7 +216,6 @@ def trust_rank(task_hook: TaskHook): ...@@ -216,7 +216,6 @@ def trust_rank(task_hook: TaskHook):
# Call graph-tool to compute TrustRank. # Call graph-tool to compute TrustRank.
task_hook.set_progress(2 / 4.0, "Computing TrustRank.") task_hook.set_progress(2 / 4.0, "Computing TrustRank.")
trust = g.new_vertex_property("double") trust = g.new_vertex_property("double")
print(seed_ids)
trust.a[seed_ids] = 1.0 / len(seed_ids) trust.a[seed_ids] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights) scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
......
...@@ -40,9 +40,7 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_ ...@@ -40,9 +40,7 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
The graph indices for all drug nodes The graph indices for all drug nodes
""" """
# Read the graph. # Read the graph.
print(f"loading {file_path} for {target}")
g = gt.load_graph(file_path) g = gt.load_graph(file_path)
# g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms
# drug_protein = "DrugHasTarget" # drug_protein = "DrugHasTarget"
d_type = "drug" d_type = "drug"
......
...@@ -4,7 +4,6 @@ import itertools as it ...@@ -4,7 +4,6 @@ import itertools as it
def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
mc = gt.Graph(directed=False) mc = gt.Graph(directed=False)
eprop_dist = mc.new_edge_property("int") eprop_dist = mc.new_edge_property("int")
...@@ -80,10 +79,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): ...@@ -80,10 +79,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
weights_g2[e_g2] = weights[e] weights_g2[e_g2] = weights[e]
mst2 = gtt.min_spanning_tree(g2, root=None, tree_map=None, weights=weights_g2) mst2 = gtt.min_spanning_tree(g2, root=None, tree_map=None, weights=weights_g2)
g2.set_edge_filter(mst2) g2.set_edge_filter(mst2)
# vw = gt.GraphView(g2, efilt=mst2)
# g3 = Graph(vw, prune=True)
# g3 = Graph(g22)
while True: while True:
noneSteinerLeaves = [] noneSteinerLeaves = []
...@@ -94,7 +89,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): ...@@ -94,7 +89,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
break break
noneSteinerLeaves = reversed(sorted(noneSteinerLeaves)) noneSteinerLeaves = reversed(sorted(noneSteinerLeaves))
for node in noneSteinerLeaves: for node in noneSteinerLeaves:
# outarray = g3.get_out_edges(node)
g2.remove_edge(g2.edge(g2.vertex(node), g2.get_all_neighbors(node)[0])) g2.remove_edge(g2.edge(g2.vertex(node), g2.get_all_neighbors(node)[0]))
g2.remove_vertex(node) g2.remove_vertex(node)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment