From f1d418ec908093c92cc888b65431b77934f6526b Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Fri, 10 Mar 2023 18:12:23 +0100 Subject: [PATCH] removed old dir --- cami/__init__.py | 0 cami/consensus/__init__.py | 0 cami/consensus/cami_v2.py | 60 -------- cami/consensus/cami_v3.py | 71 ---------- cami/evalutation/__init__.py | 0 cami/utils/__init__.py | 0 cami/utils/networks.py | 264 ----------------------------------- 7 files changed, 395 deletions(-) delete mode 100644 cami/__init__.py delete mode 100644 cami/consensus/__init__.py delete mode 100644 cami/consensus/cami_v2.py delete mode 100644 cami/consensus/cami_v3.py delete mode 100644 cami/evalutation/__init__.py delete mode 100644 cami/utils/__init__.py delete mode 100644 cami/utils/networks.py diff --git a/cami/__init__.py b/cami/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cami/consensus/__init__.py b/cami/consensus/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cami/consensus/cami_v2.py b/cami/consensus/cami_v2.py deleted file mode 100644 index eae38aa..0000000 --- a/cami/consensus/cami_v2.py +++ /dev/null @@ -1,60 +0,0 @@ -import sys -from collections import defaultdict -import graph_tool as gt -from utils.networks import trustrank, betweenness, must - - - -# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries -# TODO maybe find a smart way to cutoff automatically? -def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params): - damping_factor = params['damping_factor'] - hub_penalty = params['hub_penalty'] - confidence_levelentage = params['confidence_level'] - weighted = 'weighted' in params and params['weighted'] - ranking_method = params['ranking'] if 'ranking' in params else 'trustrank' - trees = params.get('trees', 5) - all_nodes = params.get('all_nodes', False) - - # calculate gene weights - # set of all result genes - cami_vertices = set() - putative_vertices = set() - # CONFIG: consensus_threshold = 2 - # parse every result set of each tool - counts = defaultdict(lambda: 0) - for tool in result_sets: - result_sets[tool] -= set(seed_lst) - for vertex in result_sets[tool]: - putative_vertices.add(vertex) - counts[vertex] = counts[vertex] + tool.weight - for vertex in seed_lst: - counts[vertex] = counts[vertex] + tool.weight - - subnet = gt.GraphView(ppi_graph, vfilt=lambda v: v in putative_vertices or v in seed_lst) - weights = None - if weighted: - weights = subnet.new_edge_property("double") - for v, c in counts.items(): - weights.a[int(v)] = c - if ranking_method == 'trustrank': - scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) - elif ranking_method == 'betweenness': - scores = betweenness(subnet, seed_lst, hub_penalty, weights) - elif ranking_method == 'must': - if all_nodes: - scores = must(subnet, set(seed_lst).union(putative_vertices), 5, hub_penalty, weights, trees) - else: - scores = must(subnet, seed_lst, 5, hub_penalty, weights, trees) - putative_scores = scores.a[[int(id) for id in putative_vertices]] - putative_scores.sort() - threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_levelentage))] - for v in putative_vertices: - if scores[v] > threshold: - cami_vertices.add(v) - - # translate tool code to string - codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for - vertex in ppi_graph.vertices()} - - return cami_vertices, putative_vertices, codes2tools diff --git a/cami/consensus/cami_v3.py b/cami/consensus/cami_v3.py deleted file mode 100644 index a6e4e21..0000000 --- a/cami/consensus/cami_v3.py +++ /dev/null @@ -1,71 +0,0 @@ -import sys -from collections import defaultdict -from utils.networks import trustrank, betweenness, must -import graph_tool as gt - -# This uses a trustrank algorithm to rank all putative nodes starting from the seeds and only accepts the top 0.X entries -# TODO maybe find a smart way to cutoff automatically? -def run_cami(result_sets, ppi_graph, seed_lst, predicted_by, cami_scores, code2toolname, params): - damping_factor = params['damping_factor'] - hub_penalty = params['hub_penalty'] - confidence_levelentage = params['confidence_level'] - weighted = 'weighted' in params and params['weighted'] - ranking_method = params['ranking'] if 'ranking' in params else 'trustrank' - trees = params.get('trees',5) - all_nodes = params.get('all_nodes',False) - tolerance = params.get('tolerance',10) - - # calculate gene weights - # set of all result genes - cami_vertices = set() - putative_vertices = set() - # CONFIG: consensus_threshold = 2 - # parse every result set of each tool - counts = defaultdict(lambda: 0) - for tool in result_sets: - result_sets[tool] -= set(seed_lst) - for vertex in result_sets[tool]: - putative_vertices.add(vertex) - counts[vertex] = counts[vertex] + tool.weight - for vertex in seed_lst: - counts[vertex] = counts[vertex] + tool.weight - - tool_scores = dict() - for tool in result_sets: - subnet = gt.GraphView(ppi_graph, vfilt=lambda v: v in result_sets[tool] or v in seed_lst) - weights = None - if weighted: - weights = subnet.new_edge_property("double") - for v, c in counts.items(): - weights.a[int(v)] = c - - if ranking_method == 'trustrank': - scores = trustrank(subnet, seed_lst, damping_factor, hub_penalty, weights) - elif ranking_method == 'betweenness': - scores = betweenness(subnet, seed_lst, hub_penalty, weights) - elif ranking_method == 'must': - if all_nodes: - scores = must(ppi_graph, set(seed_lst).union(putative_vertices), trees, hub_penalty, weights, tolerance) - else: - scores = must(subnet, seed_lst, trees, hub_penalty, weights, tolerance) - tool_scores[tool] = scores - - putative_score_map = defaultdict(lambda: 0) - for _, scores in tool_scores.items(): - for id in putative_vertices: - try: - putative_score_map[id] += scores.a[int(id)] - except: - pass - putative_scores = list(putative_score_map.values()) - putative_scores.sort() - threshold = putative_scores[int(len(putative_vertices) * (1 - confidence_levelentage))] - for v in putative_vertices: - if putative_score_map[v] > threshold: - cami_vertices.add(v) - - # translate tool code to string - codes2tools = {vertex: [code2toolname[idx] for idx, code in enumerate(predicted_by[vertex]) if code == 1] for - vertex in ppi_graph.vertices()} - - return cami_vertices, putative_vertices, codes2tools diff --git a/cami/evalutation/__init__.py b/cami/evalutation/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cami/utils/__init__.py b/cami/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cami/utils/networks.py b/cami/utils/networks.py deleted file mode 100644 index f045996..0000000 --- a/cami/utils/networks.py +++ /dev/null @@ -1,264 +0,0 @@ -import sys -from collections import defaultdict - -import graph_tool as gt -import graph_tool.centrality as gtc - -import graph_tool.stats as gts -import graph_tool.topology as gtt -import graph_tool.util as gtu -import itertools as it - -def edge_weights(g, base_weigths, hub_penalty, inverse=False): - avdeg = gts.vertex_average(g, "total")[0] - weights = g.new_edge_property("double", val=avdeg) - if base_weigths is not None: - for v in g.vertices(): - weights.a[int(v)] = base_weigths.a[int(v)] - if hub_penalty <= 0: - return weights - if hub_penalty > 1: - raise ValueError("Invalid hub penalty {}.".format(hub_penalty)) - for e in g.edges(): - edge_avdeg = float(e.source().out_degree() + e.target().out_degree()) / 2.0 - penalized_weight = (1.0 - hub_penalty) * avdeg + hub_penalty * edge_avdeg - if inverse: - weights[e] = 1.0 / penalized_weight - else: - weights[e] = penalized_weight - return weights - - -def steiner_tree(g, seeds, seed_map, weights, non_zero_hub_penalty): - node_name_attribute = "name" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute - mc = gt.Graph(directed=False) - eprop_dist = mc.new_edge_property("int") - mc.ep['dist'] = eprop_dist - vprop_name = mc.new_vertex_property("string") - mc.vp[node_name_attribute] = vprop_name - - eprop_path = mc.new_edge_property("object") - mc.ep['path'] = eprop_path - - mc_vertex_map = dict() - mc_id_map = dict() - for i in range(len(seeds)): - vert = mc.add_vertex() - vprop_name[i] = seeds[i] - mc_vertex_map[seeds[i]] = vert - mc_id_map[vert] = i - - for u, v in it.combinations(seeds, 2): - _, elist = gtt.shortest_path(g, g.vertex(seed_map[u]), g.vertex(seed_map[v]), weights=weights, - negative_weights=False, pred_map=None, dag=False) - e = mc.add_edge(mc_vertex_map[u], mc_vertex_map[v]) - eprop_dist[e] = len(elist) - mc.ep.path[e] = list(elist) - - mst = gtt.min_spanning_tree(mc, weights=eprop_dist, root=None, tree_map=None) - mc.set_edge_filter(mst) - - g2 = gt.Graph(directed=False) - vprop_name = g2.new_vertex_property("string") - g2.vp[node_name_attribute] = vprop_name - - g2_vertex_map = dict() - g2_id_map = dict() - addedNodes = set() - for i in range(len(seeds)): - vert = g2.add_vertex() - vprop_name[i] = seeds[i] - g2_vertex_map[seeds[i]] = vert - g2_id_map[vert] = i - addedNodes.add(seeds[i]) - - allmcedges = [] - - for mc_edges in mc.edges(): - path = mc.ep.path[mc_edges] - allmcedges.extend(path) - - j = len(seeds) - allmcedges_g2 = [] - for e in allmcedges: - # sourceName = g.vertex_properties["name"][e.source()] - # targetName = g.vertex_properties["name"][e.target()] - sourceName = g.vertex_properties[node_name_attribute][e.source()] - targetName = g.vertex_properties[node_name_attribute][e.target()] - if sourceName not in addedNodes: - vert = g2.add_vertex() - vprop_name[j] = sourceName - g2_vertex_map[sourceName] = vert - g2_id_map[vert] = j - addedNodes.add(sourceName) - j += 1 - if targetName not in addedNodes: - vert = g2.add_vertex() - vprop_name[j] = targetName - g2_vertex_map[targetName] = vert - g2_id_map[vert] = j - addedNodes.add(targetName) - j += 1 - allmcedges_g2.append(g2.add_edge(g2_vertex_map[sourceName], g2_vertex_map[targetName])) - weights_g2 = g2.new_edge_property("double", val=1.0) - if non_zero_hub_penalty: - for e, e_g2 in zip(allmcedges, allmcedges_g2): - weights_g2[e_g2] = weights[e] - mst2 = gtt.min_spanning_tree(g2, root=None, tree_map=None, weights=weights_g2) - g2.set_edge_filter(mst2) - - while True: - noneSteinerLeaves = [] - for i in range(g2.num_vertices()): - if g2.vertex(i).out_degree() == 1 and g2.vertex_properties[node_name_attribute][i] not in seeds: - noneSteinerLeaves.append(i) - if len(noneSteinerLeaves) == 0: - break - noneSteinerLeaves = reversed(sorted(noneSteinerLeaves)) - for node in noneSteinerLeaves: - try: - g2.remove_edge(g2.edge(g2.vertex(node), g2.get_all_neighbors(node)[0])) - except: - pass - g2.remove_vertex(node) - - return g2 - -def find_bridges(g): - r"""Finds all bridges in a graph.""" - global __time - __time = 0 - sys.setrecursionlimit(g.num_vertices() + 1) - visited = g.new_vertex_property("boolean", False) - disc = g.new_vertex_property("float", float("inf")) - low = g.new_vertex_property("float", float("inf")) - parent = g.new_vertex_property("int", -1) - is_bridge = g.new_edge_property("boolean", False) - for node in range(g.num_vertices()): - if not visited[node]: - __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge) - return is_bridge - -def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): - visited[node] = True - global __time - disc[node] = __time - low[node] = __time - __time += 1 - - for nb in g.get_all_neighbors(node): - if not visited[nb]: - parent[nb] = node - __dfs_find_bridges(g, int(nb), visited, disc, low, parent, is_bridge) - low[node] = min(low[node], low[nb]) - if low[nb] > disc[node]: - try: - is_bridge[g.edge(node, nb)] = True - except: - pass - elif int(nb) != parent[node]: #TODO can in theory be removed - low[node] = min(low[node], disc[nb]) - - -def must(g, seed_ids, num_trees, hub_penalty, weights=None, tolerance=10): - if gt.openmp_enabled(): - gt.openmp_set_num_threads(6) - weights = edge_weights(g, weights, hub_penalty, inverse=True) - scores = defaultdict(lambda:0) - node_name_attribute = 'name' - seed_map = {g.vertex_properties[node_name_attribute][node] :node for node in seed_ids} - seed_ids = list(seed_map.keys()) - first_tree = steiner_tree(g, seed_ids, seed_map, weights, hub_penalty > 0) - num_found_trees = 1 - tree_edges = [] - tree_nodes = set() - for tree_edge in first_tree.edges(): - source_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.source()]] - target_name = first_tree.vertex_properties[node_name_attribute][first_tree.vertex_index[tree_edge.target()]] - tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0], - gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) - tree_nodes.add(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0]) - tree_nodes.add(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]) - cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges]) - returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], - match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node - in range(first_tree.num_vertices())) - for vertex in tree_nodes: - scores[vertex] +=1 - if num_trees > 1: - is_bridge = find_bridges(g) - edge_filter = g.new_edge_property("boolean", True) - while len(tree_edges) > 0: - tree_edge = tree_edges.pop() - g_edge = g.edge(tree_edge[0], tree_edge[1]) - if not is_bridge[g_edge]: - edge_filter[g_edge] = False - g.set_edge_filter(edge_filter) - next_tree = steiner_tree(g, seed_ids, seed_map, weights, hub_penalty > 0) - next_tree_edges = set() - for next_tree_edge in next_tree.edges(): - source_name = next_tree.vertex_properties[node_name_attribute][ - next_tree.vertex_index[next_tree_edge.source()]] - target_name = next_tree.vertex_properties[node_name_attribute][ - next_tree.vertex_index[next_tree_edge.target()]] - tree_nodes.add( - gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0]) - tree_nodes.add( - gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0]) - next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], - match=source_name)[0], - gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], - match=target_name)[0])) - for vertex in tree_nodes: - scores[vertex] += 1 - cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges]) - if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0): - num_found_trees += 1 - for node in range(next_tree.num_vertices()): - returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], - match=next_tree.vertex_properties[node_name_attribute][ - node])[0])) - removed_edges = [] - for source, target in tree_edges: - if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)): - removed_edges.append((source, target)) - for edge in removed_edges: - tree_edges.remove(edge) - g.clear_filters() - edge_filter[g_edge] = True - if num_found_trees >= num_trees: - break - score_prop = g.new_vertex_property("float") - for v,c in scores.items(): - score_prop[int(v)]=c - return score_prop - - -def trustrank(g, seed_ids, damping_factor, hub_penalty, weights=None): - if gt.openmp_enabled(): - gt.openmp_set_num_threads(6) - weights = edge_weights(g, weights, hub_penalty, inverse=True) - # Call graph-tool to compute TrustRank. - trust = g.new_vertex_property("double") - trust.a[[int(id) for id in seed_ids]] = 1.0 / len(seed_ids) - scores = gtc.pagerank(g, damping=damping_factor, pers=trust, weight=weights) - # Compute and return the results. - return scores - - -def betweenness(g, seed_ids, hub_penalty, weights=None): - if gt.openmp_enabled(): - gt.openmp_set_num_threads(6) - weights = edge_weights(g, weights, hub_penalty, inverse=True) - scores = g.new_vertex_property("float") - all_pairs = [(source, target) for source in seed_ids for target in seed_ids if source < target] - for source, target in all_pairs: - local_scores = g.new_vertex_property("float") - num_paths = 0.0 - for path in gtt.all_shortest_paths(g, source, target, weights=weights): - local_scores.a[path[1:-1]] += 1 - num_paths += 1 - if num_paths > 0: - local_scores.a /= num_paths - scores.a += local_scores.a - return scores -- GitLab