From 5bd6318e52ce10ac9b2751eee2014307e9b2947b Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Thu, 14 Jul 2022 16:40:18 +0200 Subject: [PATCH] data and package update Former-commit-id: 332e23fc766dc301c75887f71e8006c04accfc09 --- Dockerfile | 11 ++++------- scripts/docker-entrypoint.sh | 2 +- tasks/multi_steiner.py | 17 +++++++++++++++-- tasks/util/find_bridges.py | 11 +++++++---- tasks/util/read_graph_tool_graph.py | 20 ++++++++++++++++---- 5 files changed, 43 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5bafbf3..42a7354 100755 --- a/Dockerfile +++ b/Dockerfile @@ -12,18 +12,15 @@ RUN apt-get install -y supervisor nginx RUN apt-get install -y libgtk-3-dev RUN apt-get install wget -COPY ./requirements.txt /usr/src/drugstone/requirements.txt +RUN conda install -y conda python=3.8 +RUN conda install -c conda-forge -y graph-tool=2.45 -RUN conda install -y conda=4.3.16 -RUN conda install -c conda-forge -y graph-tool=2.32 +RUN pip install gunicorn -RUN pip install pyvcf +COPY ./requirements.txt /usr/src/drugstone/requirements.txt RUN pip install -r /usr/src/drugstone/requirements.txt -RUN pip install gunicorn COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf -#COPY scripts/docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh -# COPY ./scripts/ /usr/src/drugstone/scripts/ COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/ RUN pip install /usr/src/drugstone/python_nedrex/ diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index 43bf32d..3c90d86 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone python3 manage.py migrate python3 manage.py createfixtures python3 manage.py cleanuptasks -python3 manage.py populate_db --update -a +#python3 manage.py populate_db --update -a python3 manage.py make_graphs /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" diff --git a/tasks/multi_steiner.py b/tasks/multi_steiner.py index affaac1..30a8adb 100755 --- a/tasks/multi_steiner.py +++ b/tasks/multi_steiner.py @@ -127,39 +127,52 @@ def multi_steiner(task_hook: TaskHook): tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges]) # returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=first_tree.vertex_properties["name"][node])[0]) for node in range(first_tree.num_vertices())) + print(f"Before gtu: Costs={cost_first_tree}") returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node in range(first_tree.num_vertices())) - - + print(f"After gtu: {returned_nodes}") + print(num_trees) if num_trees > 1: + print("num_trees > 1") is_bridge = find_bridges(g) + print("found bridges") edge_filter = g.new_edge_property("boolean", True) + print("filtered edges") found_new_tree = True while len(tree_edges) > 0: + print(f"Tree edges length: {len(tree_edges)}") if found_new_tree: task_hook.set_progress(float(num_found_trees + 2) / (float(num_trees + 3)), "Computing Steiner tree {} of {}.".format(num_found_trees + 1, num_trees)) found_new_tree = False tree_edge = tree_edges.pop() + print("1") g_edge = g.edge(tree_edge[0], tree_edge[1]) if not is_bridge[g_edge]: + print("2") edge_filter[g_edge] = False g.set_edge_filter(edge_filter) next_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0) + print("3") next_tree_edges = set() for next_tree_edge in next_tree.edges(): # source_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.source()]] # target_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.target()]] # next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0])) + print("4") source_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.source()]] target_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.target()]] next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges]) if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0): + print("5") found_new_tree = True num_found_trees += 1 for node in range(next_tree.num_vertices()): + print("GTU again") # returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=next_tree.vertex_properties["name"][node])[0])) returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],match=next_tree.vertex_properties[node_name_attribute][node])[0])) + print("GTU done") removed_edges = [] + print("6") for source, target in tree_edges: if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)): removed_edges.append((source, target)) diff --git a/tasks/util/find_bridges.py b/tasks/util/find_bridges.py index ad9cc16..6fc36b6 100755 --- a/tasks/util/find_bridges.py +++ b/tasks/util/find_bridges.py @@ -4,13 +4,16 @@ __time = 0 def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): - + print("Dfs_find_bridges") visited[node] = True global __time disc[node] = __time low[node] = __time __time += 1 - + + print(f"Checking neighbors of {g.vertex(node)}") + print(f"Degree = {g.vertex(node).out_degree()}") + for nb in g.get_all_neighbors(node): if not visited[nb]: parent[nb] = node @@ -18,12 +21,12 @@ def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): low[node] = min(low[node], low[nb]) if low[nb] > disc[node]: is_bridge[g.edge(node, nb)] = True - elif int(nb) != parent[node]: + elif int(nb) != parent[node]: #TODO can in theory be removed because low[node] = min(low[node], disc[nb]) def find_bridges(g): r"""Finds all bridges in a graph.""" - + print("Finding bridges") global __time __time = 0 sys.setrecursionlimit(g.num_vertices() + 1) diff --git a/tasks/util/read_graph_tool_graph.py b/tasks/util/read_graph_tool_graph.py index bf26636..fb5b3aa 100755 --- a/tasks/util/read_graph_tool_graph.py +++ b/tasks/util/read_graph_tool_graph.py @@ -79,13 +79,25 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals # Check that all seed seeds have been matched and throw error, otherwise. # print(deleted_nodes) - print(seed_ids) - for protein, found in is_matched.items(): - if not found: - raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path)) + # print(seed_ids) + # for protein, found in is_matched.items(): + # if not found: + # raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path)) # Delete edges that should be ignored or are not contained in the selected dataset. deleted_edges = [] + + for edge in g.edges(): + if edge.source == edge.target: + deleted_edges.append(edge) + + g.set_fast_edge_removal(fast=True) + for edge in deleted_edges: + g.remove_edge(edge) + g.set_fast_edge_removal(fast=False) + + deleted_edges = [] + if (drug_ids and not include_indirect_drugs): # If only_direct_drugs should be included, remove any drug-protein edges that the drug is not a direct neighbor of any seeds direct_drugs = set() for edge in g.edges(): -- GitLab