From b6378426311739b2aad0fb18b2cebe51e7ef0da9 Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Tue, 26 Jul 2022 16:25:58 +0200 Subject: [PATCH] fixed must and kpm; removed prints Former-commit-id: 67306ed60f13b0702c2c6ed812f0fc18140505f0 [formerly 97b6706ddca64fc407e2639e1c182a6bc5ae78b9] Former-commit-id: 7f2eb37d1159e20af4a4bb18c7425a53c4a57d7d --- tasks/keypathwayminer_task.py | 23 ++++++++++++++---- tasks/multi_steiner.py | 36 ++++------------------------- tasks/trust_rank.py | 1 - tasks/util/read_graph_tool_graph.py | 2 -- tasks/util/steiner_tree.py | 6 ----- 5 files changed, 23 insertions(+), 45 deletions(-) diff --git a/tasks/keypathwayminer_task.py b/tasks/keypathwayminer_task.py index 54080de..639d589 100755 --- a/tasks/keypathwayminer_task.py +++ b/tasks/keypathwayminer_task.py @@ -1,5 +1,6 @@ import base64 import datetime +import itertools import json import random import string @@ -199,18 +200,32 @@ def kpm_task(task_hook: TaskHook): # Remapping everything from UniProt Accession numbers to internal IDs result_nodes = Protein.objects.filter(uniprot_code__in=network["nodes"]) node_map = {} + node_map_for_edges = {} + for node in result_nodes: - node_map[node.uniprot_code] = node.id - network["nodes"] = list(map(lambda uniprot: "p" + str(node_map[uniprot]), network["nodes"])) + node_map_for_edges[node.uniprot_code] = node.id + if id_space == 'symbol': + node_map[node.uniprot_code] = [node.gene] + if id_space == 'entrez': + node_map[node.uniprot_code] = [node.entrez] + if id_space == 'uniprot': + node_map[node.uniprot_code] = [node.uniprot_code] + if id_space == 'ensembl': + node_map[node.uniprot_code] = [ensg.name for ensg in EnsemblGene.objects.filter(protein_id=node.id)] + + flat_map = lambda f, xs: [y for ys in xs for y in f(ys)] + + network["nodes"] = flat_map(lambda uniprot: node_map[uniprot], network["nodes"]) network["edges"] = list(map( - lambda uniprot_edge: {"from": "p" + str(node_map[uniprot_edge["from"]]), - "to": "p" + str(node_map[uniprot_edge["to"]])}, + lambda uniprot_edge: {"from": "p" + str(node_map_for_edges[uniprot_edge["from"]]), + "to": "p" + str(node_map_for_edges[uniprot_edge["to"]])}, network["edges"])) node_types = {node: "protein" for node in network["nodes"]} is_seed = {node: node in set(map(lambda p: "p"+str(p),protein_backend_ids)) for node in network["nodes"]} result_dict = { "network": network, + "target_nodes":[node for node in network["nodes"] if node not in task_hook.seeds], "node_attributes": {"node_types": node_types, "is_seed": is_seed} } task_hook.set_results(results=result_dict) diff --git a/tasks/multi_steiner.py b/tasks/multi_steiner.py index f361168..cc479ad 100755 --- a/tasks/multi_steiner.py +++ b/tasks/multi_steiner.py @@ -113,74 +113,48 @@ def multi_steiner(task_hook: TaskHook): filename += "_licenced" filename = os.path.join(task_hook.data_directory, filename + ".gt") g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target) - # seed_map = {g.vertex_properties["name"][node]: node for node in seed_ids} seed_map = {g.vertex_properties[node_name_attribute][node]: node for node in seed_ids} task_hook.set_progress(1 / (float(num_trees + 3)), "Computing edge weights.") weights = edge_weights(g, hub_penalty) # Find first steiner trees seeds = list(filter(lambda s: s in seed_map, seeds)) - print(seeds) - print(seed_ids) task_hook.set_progress(2 / (float(num_trees + 3)), "Computing Steiner tree 1 of {}.".format(num_trees)) first_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0) num_found_trees = 1 tree_edges = [] for tree_edge in first_tree.edges(): - # source_name = first_tree.vertex_properties["name"][first_tree.vertex_index[tree_edge.source()]] - # target_name = first_tree.vertex_properties["name"][first_tree.vertex_index[tree_edge.target()]] - # tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0])) source_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.source()]] target_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.target()]] tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges]) - # returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=first_tree.vertex_properties["name"][node])[0]) for node in range(first_tree.num_vertices())) - print(f"Before gtu: Costs={cost_first_tree}") returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node in range(first_tree.num_vertices())) - print(f"After gtu: {returned_nodes}") - print(num_trees) if num_trees > 1: - print("num_trees > 1") is_bridge = find_bridges(g) - print("found bridges") edge_filter = g.new_edge_property("boolean", True) - print("filtered edges") found_new_tree = True while len(tree_edges) > 0: - print(f"Tree edges length: {len(tree_edges)}") if found_new_tree: task_hook.set_progress(float(num_found_trees + 2) / (float(num_trees + 3)), "Computing Steiner tree {} of {}.".format(num_found_trees + 1, num_trees)) found_new_tree = False tree_edge = tree_edges.pop() - print("1") g_edge = g.edge(tree_edge[0], tree_edge[1]) if not is_bridge[g_edge]: - print("2") edge_filter[g_edge] = False g.set_edge_filter(edge_filter) next_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0) - print("3") next_tree_edges = set() for next_tree_edge in next_tree.edges(): - # source_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.source()]] - # target_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.target()]] - # next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0])) - print("4") source_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.source()]] target_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.target()]] next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges]) if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0): - print("5") found_new_tree = True num_found_trees += 1 for node in range(next_tree.num_vertices()): - print("GTU again") - # returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=next_tree.vertex_properties["name"][node])[0])) returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],match=next_tree.vertex_properties[node_name_attribute][node])[0])) - print("GTU done") removed_edges = [] - print("6") for source, target in tree_edges: if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)): removed_edges.append((source, target)) @@ -190,18 +164,15 @@ def multi_steiner(task_hook: TaskHook): edge_filter[g_edge] = True if num_found_trees >= num_trees: break - task_hook.set_progress((float(num_trees + 2)) / (float(num_trees + 3)), "Formatting results") returned_edges = [] for node in returned_nodes: for neighbor in g.get_all_neighbors(node): if int(neighbor) > node and int(neighbor) in returned_nodes: returned_edges.append((node, int(neighbor))) - # subgraph = {"nodes": [g.vertex_properties["name"][node] for node in returned_nodes], - # "edges": [{"from": g.vertex_properties["name"][source], "to": g.vertex_properties["name"][target]} for source, target in returned_edges]} - # node_types = {g.vertex_properties["name"][node]: g.vertex_properties["type"][node] for node in returned_nodes} - # is_seed = {g.vertex_properties["name"][node]: node in set(seed_ids) for node in returned_nodes} - subgraph = {"nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes], + + accepted_nodes = [g.vertex_properties[node_name_attribute][node] for node in returned_nodes] + subgraph = {"nodes": accepted_nodes, "edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for source, target in returned_edges]} node_types = {g.vertex_properties[node_name_attribute][node]: g.vertex_properties["type"][node] for node in returned_nodes} @@ -209,6 +180,7 @@ def multi_steiner(task_hook: TaskHook): task_hook.set_results({ "network": subgraph, "node_attributes": {"node_types": node_types, "is_seed": is_seed}, + "taget_nodes":accepted_nodes, 'gene_interaction_dataset': ppi_dataset, 'drug_interaction_dataset': pdi_dataset }) diff --git a/tasks/trust_rank.py b/tasks/trust_rank.py index fbcb5cc..ec75373 100755 --- a/tasks/trust_rank.py +++ b/tasks/trust_rank.py @@ -216,7 +216,6 @@ def trust_rank(task_hook: TaskHook): # Call graph-tool to compute TrustRank. task_hook.set_progress(2 / 4.0, "Computing TrustRank.") trust = g.new_vertex_property("double") - print(seed_ids) trust.a[seed_ids] = 1.0 / len(seed_ids) scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights) diff --git a/tasks/util/read_graph_tool_graph.py b/tasks/util/read_graph_tool_graph.py index 92c1333..9901ae1 100755 --- a/tasks/util/read_graph_tool_graph.py +++ b/tasks/util/read_graph_tool_graph.py @@ -40,9 +40,7 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_ The graph indices for all drug nodes """ # Read the graph. - print(f"loading {file_path} for {target}") g = gt.load_graph(file_path) - # g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms # drug_protein = "DrugHasTarget" d_type = "drug" diff --git a/tasks/util/steiner_tree.py b/tasks/util/steiner_tree.py index f91f647..b697243 100755 --- a/tasks/util/steiner_tree.py +++ b/tasks/util/steiner_tree.py @@ -4,7 +4,6 @@ import itertools as it def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): - node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute mc = gt.Graph(directed=False) eprop_dist = mc.new_edge_property("int") @@ -80,10 +79,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): weights_g2[e_g2] = weights[e] mst2 = gtt.min_spanning_tree(g2, root=None, tree_map=None, weights=weights_g2) g2.set_edge_filter(mst2) - # vw = gt.GraphView(g2, efilt=mst2) - # g3 = Graph(vw, prune=True) - - # g3 = Graph(g22) while True: noneSteinerLeaves = [] @@ -94,7 +89,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): break noneSteinerLeaves = reversed(sorted(noneSteinerLeaves)) for node in noneSteinerLeaves: - # outarray = g3.get_out_edges(node) g2.remove_edge(g2.edge(g2.vertex(node), g2.get_all_neighbors(node)[0])) g2.remove_vertex(node) -- GitLab