From 20414717e29b6a83066bd1221a65296d057c4bd0 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Tue, 26 Jul 2022 16:25:58 +0200
Subject: [PATCH] fixed must and kpm; removed prints

---
 tasks/keypathwayminer_task.py       | 23 ++++++++++++++----
 tasks/multi_steiner.py              | 36 ++++-------------------------
 tasks/trust_rank.py                 |  1 -
 tasks/util/read_graph_tool_graph.py |  2 --
 tasks/util/steiner_tree.py          |  6 -----
 5 files changed, 23 insertions(+), 45 deletions(-)

diff --git a/tasks/keypathwayminer_task.py b/tasks/keypathwayminer_task.py
index 54080de..639d589 100755
--- a/tasks/keypathwayminer_task.py
+++ b/tasks/keypathwayminer_task.py
@@ -1,5 +1,6 @@
 import base64
 import datetime
+import itertools
 import json
 import random
 import string
@@ -199,18 +200,32 @@ def kpm_task(task_hook: TaskHook):
     # Remapping everything from UniProt Accession numbers to internal IDs
     result_nodes = Protein.objects.filter(uniprot_code__in=network["nodes"])
     node_map = {}
+    node_map_for_edges = {}
+
     for node in result_nodes:
-        node_map[node.uniprot_code] = node.id
-    network["nodes"] = list(map(lambda uniprot: "p" + str(node_map[uniprot]), network["nodes"]))
+        node_map_for_edges[node.uniprot_code] = node.id
+        if id_space == 'symbol':
+            node_map[node.uniprot_code] = [node.gene]
+        if id_space == 'entrez':
+            node_map[node.uniprot_code] = [node.entrez]
+        if id_space == 'uniprot':
+            node_map[node.uniprot_code] = [node.uniprot_code]
+        if id_space == 'ensembl':
+            node_map[node.uniprot_code] = [ensg.name for ensg in EnsemblGene.objects.filter(protein_id=node.id)]
+
+    flat_map = lambda f, xs: [y for ys in xs for y in f(ys)]
+
+    network["nodes"] = flat_map(lambda uniprot: node_map[uniprot], network["nodes"])
     network["edges"] = list(map(
-        lambda uniprot_edge: {"from": "p" + str(node_map[uniprot_edge["from"]]),
-                              "to": "p" + str(node_map[uniprot_edge["to"]])},
+        lambda uniprot_edge: {"from": "p" + str(node_map_for_edges[uniprot_edge["from"]]),
+                              "to": "p" + str(node_map_for_edges[uniprot_edge["to"]])},
         network["edges"]))
 
     node_types = {node: "protein" for node in network["nodes"]}
     is_seed = {node: node in set(map(lambda p: "p"+str(p),protein_backend_ids)) for node in network["nodes"]}
     result_dict = {
         "network": network,
+        "target_nodes":[node for node in network["nodes"] if node not in task_hook.seeds],
         "node_attributes": {"node_types": node_types, "is_seed": is_seed}
     }
     task_hook.set_results(results=result_dict)
diff --git a/tasks/multi_steiner.py b/tasks/multi_steiner.py
index f361168..cc479ad 100755
--- a/tasks/multi_steiner.py
+++ b/tasks/multi_steiner.py
@@ -113,74 +113,48 @@ def multi_steiner(task_hook: TaskHook):
         filename += "_licenced"
     filename = os.path.join(task_hook.data_directory, filename + ".gt")
     g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target)
-    # seed_map = {g.vertex_properties["name"][node]: node for node in seed_ids}
     seed_map = {g.vertex_properties[node_name_attribute][node]: node for node in seed_ids}
     task_hook.set_progress(1 / (float(num_trees + 3)), "Computing edge weights.")
     weights = edge_weights(g, hub_penalty)
     
     # Find first steiner trees
     seeds = list(filter(lambda s: s in seed_map, seeds))
-    print(seeds)
-    print(seed_ids)
     task_hook.set_progress(2 / (float(num_trees + 3)), "Computing Steiner tree 1 of {}.".format(num_trees))
     first_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0)
     num_found_trees = 1
     tree_edges = []
     for tree_edge in first_tree.edges():
-        # source_name = first_tree.vertex_properties["name"][first_tree.vertex_index[tree_edge.source()]]
-        # target_name = first_tree.vertex_properties["name"][first_tree.vertex_index[tree_edge.target()]]
-        # tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0]))
         source_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.source()]]
         target_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.target()]]
         tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]))
     cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges])
-    # returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=first_tree.vertex_properties["name"][node])[0]) for node in range(first_tree.num_vertices()))
-    print(f"Before gtu: Costs={cost_first_tree}")
     returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node in range(first_tree.num_vertices()))
-    print(f"After gtu: {returned_nodes}")
-    print(num_trees)
     if num_trees > 1:
-        print("num_trees > 1")
         is_bridge = find_bridges(g)
-        print("found bridges")
         edge_filter = g.new_edge_property("boolean", True)
-        print("filtered edges")
         found_new_tree = True
         while len(tree_edges) > 0:
-            print(f"Tree edges length: {len(tree_edges)}")
             if found_new_tree:
                 task_hook.set_progress(float(num_found_trees + 2) / (float(num_trees + 3)), "Computing Steiner tree {} of {}.".format(num_found_trees + 1, num_trees))
             found_new_tree = False
             tree_edge = tree_edges.pop()
-            print("1")
             g_edge = g.edge(tree_edge[0], tree_edge[1])
             if not is_bridge[g_edge]:
-                print("2")
                 edge_filter[g_edge] = False
                 g.set_edge_filter(edge_filter)
                 next_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0)
-                print("3")
                 next_tree_edges = set()
                 for next_tree_edge in next_tree.edges():
-                    # source_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.source()]]
-                    # target_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.target()]]
-                    # next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0]))
-                    print("4")
                     source_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.source()]]
                     target_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.target()]]
                     next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]))
                 cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges])
                 if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0):
-                    print("5")
                     found_new_tree = True
                     num_found_trees += 1
                     for node in range(next_tree.num_vertices()):
-                        print("GTU again")
-                        # returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=next_tree.vertex_properties["name"][node])[0]))
                         returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],match=next_tree.vertex_properties[node_name_attribute][node])[0]))
-                        print("GTU done")
                     removed_edges = []
-                    print("6")
                     for source, target in tree_edges:
                         if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)):
                             removed_edges.append((source, target))
@@ -190,18 +164,15 @@ def multi_steiner(task_hook: TaskHook):
                 edge_filter[g_edge] = True
             if num_found_trees >= num_trees:
                 break
-    
     task_hook.set_progress((float(num_trees + 2)) / (float(num_trees + 3)), "Formatting results")
     returned_edges = []
     for node in returned_nodes:
         for neighbor in g.get_all_neighbors(node):
             if int(neighbor) > node and int(neighbor) in returned_nodes:
                 returned_edges.append((node, int(neighbor)))
-    # subgraph = {"nodes": [g.vertex_properties["name"][node] for node in returned_nodes],
-    #             "edges": [{"from": g.vertex_properties["name"][source], "to": g.vertex_properties["name"][target]} for source, target in returned_edges]}
-    # node_types = {g.vertex_properties["name"][node]: g.vertex_properties["type"][node] for node in returned_nodes}
-    # is_seed = {g.vertex_properties["name"][node]: node in set(seed_ids) for node in returned_nodes}
-    subgraph = {"nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes],
+
+    accepted_nodes = [g.vertex_properties[node_name_attribute][node] for node in returned_nodes]
+    subgraph = {"nodes": accepted_nodes,
                 "edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for
                           source, target in returned_edges]}
     node_types = {g.vertex_properties[node_name_attribute][node]: g.vertex_properties["type"][node] for node in returned_nodes}
@@ -209,6 +180,7 @@ def multi_steiner(task_hook: TaskHook):
     task_hook.set_results({
         "network": subgraph,
         "node_attributes": {"node_types": node_types, "is_seed": is_seed},
+        "taget_nodes":accepted_nodes,
         'gene_interaction_dataset': ppi_dataset,
         'drug_interaction_dataset': pdi_dataset
     })
diff --git a/tasks/trust_rank.py b/tasks/trust_rank.py
index fbcb5cc..ec75373 100755
--- a/tasks/trust_rank.py
+++ b/tasks/trust_rank.py
@@ -216,7 +216,6 @@ def trust_rank(task_hook: TaskHook):
     # Call graph-tool to compute TrustRank.
     task_hook.set_progress(2 / 4.0, "Computing TrustRank.")
     trust = g.new_vertex_property("double")
-    print(seed_ids)
     trust.a[seed_ids] = 1.0 / len(seed_ids)
     scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
     
diff --git a/tasks/util/read_graph_tool_graph.py b/tasks/util/read_graph_tool_graph.py
index 92c1333..9901ae1 100755
--- a/tasks/util/read_graph_tool_graph.py
+++ b/tasks/util/read_graph_tool_graph.py
@@ -40,9 +40,7 @@ def read_graph_tool_graph(file_path, seeds, id_space, max_deg, include_indirect_
       The graph indices for all drug nodes
     """
     # Read the graph.
-    print(f"loading {file_path} for {target}")
     g = gt.load_graph(file_path)
-    # g = gtt.extract_largest_component(gg, directed=False, prune=True)   # this line is added since we need to work with the LCC of the graphs for all algorithms
 
     # drug_protein = "DrugHasTarget"
     d_type = "drug"
diff --git a/tasks/util/steiner_tree.py b/tasks/util/steiner_tree.py
index f91f647..b697243 100755
--- a/tasks/util/steiner_tree.py
+++ b/tasks/util/steiner_tree.py
@@ -4,7 +4,6 @@ import itertools as it
 
 
 def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
-
     node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
     mc = gt.Graph(directed=False)
     eprop_dist = mc.new_edge_property("int")
@@ -80,10 +79,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
             weights_g2[e_g2] = weights[e]
     mst2 = gtt.min_spanning_tree(g2, root=None, tree_map=None, weights=weights_g2)
     g2.set_edge_filter(mst2)
-    # vw = gt.GraphView(g2, efilt=mst2)
-    # g3 = Graph(vw, prune=True)
-
-    # g3 = Graph(g22)
 
     while True:
         noneSteinerLeaves = []
@@ -94,7 +89,6 @@ def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
             break
         noneSteinerLeaves = reversed(sorted(noneSteinerLeaves))
         for node in noneSteinerLeaves:
-            # outarray = g3.get_out_edges(node)
             g2.remove_edge(g2.edge(g2.vertex(node), g2.get_all_neighbors(node)[0]))
             g2.remove_vertex(node)
             
-- 
GitLab