Skip to content
Snippets Groups Projects
Commit f1d418ec authored by AndiMajore's avatar AndiMajore
Browse files

removed old dir

parent 77914cc6
Branches camis
No related tags found
No related merge requests found
import sys
from collections import defaultdict
import graph_tool as gt
from utils.networks import trustrank, betweenness, must
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params):
damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty']
confidence_levelentage = params['confidence_level']
weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees', 5)
all_nodes = params.get('all_nodes', False)
# calculate gene weights
# set of all result genes
cami_vertices = set()
putative_vertices = set()
# CONFIG: consensus_threshold = 2
# parse every result set of each tool
counts = defaultdict(lambda: 0)
for tool in result_sets:
result_sets[tool] -= set(seed_lst)
for vertex in result_sets[tool]:
putative_vertices.add(vertex)
counts[vertex] = counts[vertex] + tool.weight
for vertex in seed_lst:
counts[vertex] = counts[vertex] + tool.weight
subnet = gt.GraphView(ppi_graph, vfilt=lambda v: v in putative_vertices or v in seed_lst)
weights = None
if weighted:
weights = subnet.new_edge_property("double")
for v, c in counts.items():
weights.a[int(v)] = c
if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights)
elif ranking_method == 'must':
if all_nodes:
scores = must(subnet, set(seed_lst).union(putative_vertices), 5, hub_penalty, weights, trees)
else:
scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees)
putative_scores = scores.a[[int(id) for id in putative_vertices]]
putative_scores.sort()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_levelentage))]
for v in putative_vertices:
if scores[v] > threshold:
cami_vertices.add(v)
# translate tool code to string
codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for
vertex in ppi_graph.vertices()}
return cami_vertices, putative_vertices, codes2tools
import sys
from collections import defaultdict
from utils.networks import trustrank, betweenness, must
import graph_tool as gt
# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries
# TODO maybe find a smart way to cutoff automatically?
def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params):
damping_factor = params['damping_factor']
hub_penalty = params['hub_penalty']
confidence_levelentage = params['confidence_level']
weighted = 'weighted' in params and params['weighted']
ranking_method = params['ranking'] if 'ranking' in params else 'trustrank'
trees = params.get('trees',5)
all_nodes = params.get('all_nodes',False)
tolerance = params.get('tolerance',10)
# calculate gene weights
# set of all result genes
cami_vertices = set()
putative_vertices = set()
# CONFIG: consensus_threshold = 2
# parse every result set of each tool
counts = defaultdict(lambda: 0)
for tool in result_sets:
result_sets[tool] -= set(seed_lst)
for vertex in result_sets[tool]:
putative_vertices.add(vertex)
counts[vertex] = counts[vertex] + tool.weight
for vertex in seed_lst:
counts[vertex] = counts[vertex] + tool.weight
tool_scores = dict()
for tool in result_sets:
subnet = gt.GraphView(ppi_graph, vfilt=lambda v: v in result_sets[tool] or v in seed_lst)
weights = None
if weighted:
weights = subnet.new_edge_property("double")
for v, c in counts.items():
weights.a[int(v)] = c
if ranking_method == 'trustrank':
scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights)
elif ranking_method == 'betweenness':
scores = betweenness(subnet, seed_lst, hub_penalty, weights)
elif ranking_method == 'must':
if all_nodes:
scores = must(ppi_graph, set(seed_lst).union(putative_vertices), trees, hub_penalty, weights, tolerance)
else:
scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance)
tool_scores[tool] = scores
putative_score_map = defaultdict(lambda: 0)
for _, scores in tool_scores.items():
for id in putative_vertices:
try:
putative_score_map[id] += scores.a[int(id)]
except:
pass
putative_scores = list(putative_score_map.values())
putative_scores.sort()
threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_levelentage))]
for v in putative_vertices:
if putative_score_map[v] > threshold:
cami_vertices.add(v)
# translate tool code to string
codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for
vertex in ppi_graph.vertices()}
return cami_vertices, putative_vertices, codes2tools
import sys
from collections import defaultdict
import graph_tool as gt
import graph_tool.centrality as gtc
import graph_tool.stats as gts
import graph_tool.topology as gtt
import graph_tool.util as gtu
import itertools as it
def edge_weights(g, base_weigths, hub_penalty, inverse=False):
avdeg = gts.vertex_average(g, "total")[0]
weights = g.new_edge_property("double", val=avdeg)
if base_weigths is not None:
for v in g.vertices():
weights.a[int(v)] = base_weigths.a[int(v)]
if hub_penalty <= 0:
return weights
if hub_penalty > 1:
raise ValueError("Invalid hub penalty {}.".format(hub_penalty))
for e in g.edges():
edge_avdeg = float(e.source().out_degree() + e.target().out_degree()) / 2.0
penalized_weight = (1.0 - hub_penalty) * avdeg + hub_penalty * edge_avdeg
if inverse:
weights[e] = 1.0 / penalized_weight
else:
weights[e] = penalized_weight
return weights
def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty):
node_name_attribute = "name" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
mc = gt.Graph(directed=False)
eprop_dist = mc.new_edge_property("int")
mc.ep['dist'] = eprop_dist
vprop_name = mc.new_vertex_property("string")
mc.vp[node_name_attribute] = vprop_name
eprop_path = mc.new_edge_property("object")
mc.ep['path'] = eprop_path
mc_vertex_map = dict()
mc_id_map = dict()
for i in range(len(seeds)):
vert = mc.add_vertex()
vprop_name[i] = seeds[i]
mc_vertex_map[seeds[i]] = vert
mc_id_map[vert] = i
for u, v in it.combinations(seeds, 2):
_, elist = gtt.shortest_path(g, g.vertex(seed_map[u]), g.vertex(seed_map[v]), weights=weights,
negative_weights=False, pred_map=None, dag=False)
e = mc.add_edge(mc_vertex_map[u], mc_vertex_map[v])
eprop_dist[e] = len(elist)
mc.ep.path[e] = list(elist)
mst = gtt.min_spanning_tree(mc, weights=eprop_dist, root=None, tree_map=None)
mc.set_edge_filter(mst)
g2 = gt.Graph(directed=False)
vprop_name = g2.new_vertex_property("string")
g2.vp[node_name_attribute] = vprop_name
g2_vertex_map = dict()
g2_id_map = dict()
addedNodes = set()
for i in range(len(seeds)):
vert = g2.add_vertex()
vprop_name[i] = seeds[i]
g2_vertex_map[seeds[i]] = vert
g2_id_map[vert] = i
addedNodes.add(seeds[i])
allmcedges = []
for mc_edges in mc.edges():
path = mc.ep.path[mc_edges]
allmcedges.extend(path)
j = len(seeds)
allmcedges_g2 = []
for e in allmcedges:
# sourceName = g.vertex_properties["name"][e.source()]
# targetName = g.vertex_properties["name"][e.target()]
sourceName = g.vertex_properties[node_name_attribute][e.source()]
targetName = g.vertex_properties[node_name_attribute][e.target()]
if sourceName not in addedNodes:
vert = g2.add_vertex()
vprop_name[j] = sourceName
g2_vertex_map[sourceName] = vert
g2_id_map[vert] = j
addedNodes.add(sourceName)
j += 1
if targetName not in addedNodes:
vert = g2.add_vertex()
vprop_name[j] = targetName
g2_vertex_map[targetName] = vert
g2_id_map[vert] = j
addedNodes.add(targetName)
j += 1
allmcedges_g2.append(g2.add_edge(g2_vertex_map[sourceName], g2_vertex_map[targetName]))
weights_g2 = g2.new_edge_property("double", val=1.0)
if non_zero_hub_penalty:
for e, e_g2 in zip(allmcedges, allmcedges_g2):
weights_g2[e_g2] = weights[e]
mst2 = gtt.min_spanning_tree(g2, root=None, tree_map=None, weights=weights_g2)
g2.set_edge_filter(mst2)
while True:
noneSteinerLeaves = []
for i in range(g2.num_vertices()):
if g2.vertex(i).out_degree() == 1 and g2.vertex_properties[node_name_attribute][i] not in seeds:
noneSteinerLeaves.append(i)
if len(noneSteinerLeaves) == 0:
break
noneSteinerLeaves = reversed(sorted(noneSteinerLeaves))
for node in noneSteinerLeaves:
try:
g2.remove_edge(g2.edge(g2.vertex(node), g2.get_all_neighbors(node)[0]))
except:
pass
g2.remove_vertex(node)
return g2
def find_bridges(g):
r"""Finds all bridges in a graph."""
global __time
__time = 0
sys.setrecursionlimit(g.num_vertices() + 1)
visited = g.new_vertex_property("boolean", False)
disc = g.new_vertex_property("float", float("inf"))
low = g.new_vertex_property("float", float("inf"))
parent = g.new_vertex_property("int", -1)
is_bridge = g.new_edge_property("boolean", False)
for node in range(g.num_vertices()):
if not visited[node]:
__dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge)
return is_bridge
def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge):
visited[node] = True
global __time
disc[node] = __time
low[node] = __time
__time += 1
for nb in g.get_all_neighbors(node):
if not visited[nb]:
parent[nb] = node
__dfs_find_bridges(g, int(nb), visited, disc, low, parent, is_bridge)
low[node] = min(low[node], low[nb])
if low[nb] > disc[node]:
try:
is_bridge[g.edge(node, nb)] = True
except:
pass
elif int(nb) != parent[node]: #TODO can in theory be removed
low[node] = min(low[node], disc[nb])
def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
scores = defaultdict(lambda:0)
node_name_attribute = 'name'
seed_map = {g.vertex_properties[node_name_attribute][node] :node for node in seed_ids}
seed_ids = list(seed_map.keys())
first_tree = steiner_tree(g, seed_ids, seed_map, weights, hub_penalty > 0)
num_found_trees = 1
tree_edges = []
tree_nodes = set()
for tree_edge in first_tree.edges():
source_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.source()]]
target_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.target()]]
tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]))
tree_nodes.add(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0])
tree_nodes.add(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])
cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges])
returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node
in range(first_tree.num_vertices()))
for vertex in tree_nodes:
scores[vertex] +=1
if num_trees > 1:
is_bridge = find_bridges(g)
edge_filter = g.new_edge_property("boolean", True)
while len(tree_edges) > 0:
tree_edge = tree_edges.pop()
g_edge = g.edge(tree_edge[0], tree_edge[1])
if not is_bridge[g_edge]:
edge_filter[g_edge] = False
g.set_edge_filter(edge_filter)
next_tree = steiner_tree(g, seed_ids, seed_map, weights, hub_penalty > 0)
next_tree_edges = set()
for next_tree_edge in next_tree.edges():
source_name = next_tree.vertex_properties[node_name_attribute][
next_tree.vertex_index[next_tree_edge.source()]]
target_name = next_tree.vertex_properties[node_name_attribute][
next_tree.vertex_index[next_tree_edge.target()]]
tree_nodes.add(
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0])
tree_nodes.add(
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])
next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=source_name)[0],
gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=target_name)[0]))
for vertex in tree_nodes:
scores[vertex] += 1
cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges])
if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0):
num_found_trees += 1
for node in range(next_tree.num_vertices()):
returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],
match=next_tree.vertex_properties[node_name_attribute][
node])[0]))
removed_edges = []
for source, target in tree_edges:
if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)):
removed_edges.append((source, target))
for edge in removed_edges:
tree_edges.remove(edge)
g.clear_filters()
edge_filter[g_edge] = True
if num_found_trees >= num_trees:
break
score_prop = g.new_vertex_property("float")
for v,c in scores.items():
score_prop[int(v)]=c
return score_prop
def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
# Call graph-tool to compute TrustRank.
trust = g.new_vertex_property("double")
trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids)
scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights)
# Compute and return the results.
return scores
def betweenness(g, seed_ids, hub_penalty, weights=None):
if gt.openmp_enabled():
gt.openmp_set_num_threads(6)
weights = edge_weights(g, weights, hub_penalty, inverse=True)
scores = g.new_vertex_property("float")
all_pairs = [(source, target) for source in seed_ids for target in seed_ids if source < target]
for source, target in all_pairs:
local_scores = g.new_vertex_property("float")
num_paths = 0.0
for path in gtt.all_shortest_paths(g, source, target, weights=weights):
local_scores.a[path[1:-1]] += 1
num_paths += 1
if num_paths > 0:
local_scores.a /= num_paths
scores.a += local_scores.a
return scores
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment