diff --git a/Dockerfile b/Dockerfile index 5bafbf3c8633f19057ec65c128d44827a141e4c6..42a73544f9646b0855fcefed06b3cba280f78f7c 100755 --- a/Dockerfile +++ b/Dockerfile @@ -12,18 +12,15 @@ RUN apt-get install -y supervisor nginx RUN apt-get install -y libgtk-3-dev RUN apt-get install wget -COPY ./requirements.txt /usr/src/drugstone/requirements.txt +RUN conda install -y conda python=3.8 +RUN conda install -c conda-forge -y graph-tool=2.45 -RUN conda install -y conda=4.3.16 -RUN conda install -c conda-forge -y graph-tool=2.32 +RUN pip install gunicorn -RUN pip install pyvcf +COPY ./requirements.txt /usr/src/drugstone/requirements.txt RUN pip install -r /usr/src/drugstone/requirements.txt -RUN pip install gunicorn COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf -#COPY scripts/docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh -# COPY ./scripts/ /usr/src/drugstone/scripts/ COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/ RUN pip install /usr/src/drugstone/python_nedrex/ diff --git a/data/Networks/internal_APID_ChEMBL.gt b/data/Networks/internal_APID_ChEMBL.gt index 85d47348f15edc5ee92c9df20844f90699313e29..eb81c7636be08c7d8ae62aa3cf7739858f339f21 100755 Binary files a/data/Networks/internal_APID_ChEMBL.gt and b/data/Networks/internal_APID_ChEMBL.gt differ diff --git a/data/Networks/internal_APID_DGIdb.gt b/data/Networks/internal_APID_DGIdb.gt index 1a4fad379b4e2b0927e3620346e7698793f4235c..67c1286afa3bccc5fe4a3fc506970fee8f216f02 100755 Binary files a/data/Networks/internal_APID_DGIdb.gt and b/data/Networks/internal_APID_DGIdb.gt differ diff --git a/data/Networks/internal_APID_Drug Central.gt b/data/Networks/internal_APID_Drug Central.gt index c1e3617115888ace9930287714a85bce47518d8d..315e4955be1731e1be4c728235095b3908a71658 100644 Binary files a/data/Networks/internal_APID_Drug Central.gt and b/data/Networks/internal_APID_Drug Central.gt differ diff --git a/data/Networks/internal_APID_DrugBank.gt b/data/Networks/internal_APID_DrugBank.gt index e4632a1c938f23ac9ba730621d1af38346105173..315e4955be1731e1be4c728235095b3908a71658 100755 Binary files a/data/Networks/internal_APID_DrugBank.gt and b/data/Networks/internal_APID_DrugBank.gt differ diff --git a/data/Networks/internal_APID_NeDRex.gt b/data/Networks/internal_APID_NeDRex.gt index 24619c628ba6ae9a59a8afeb55e5c872e9ba9da0..5a18533dc808e6719a9ed581b20fe72ed07aae03 100644 Binary files a/data/Networks/internal_APID_NeDRex.gt and b/data/Networks/internal_APID_NeDRex.gt differ diff --git a/data/Networks/internal_BioGRID_ChEMBL.gt b/data/Networks/internal_BioGRID_ChEMBL.gt index bc6e846132feef345893410824dbc3002777c842..621a57df0588f45ef80d388b2aa296928182d85d 100755 Binary files a/data/Networks/internal_BioGRID_ChEMBL.gt and b/data/Networks/internal_BioGRID_ChEMBL.gt differ diff --git a/data/Networks/internal_BioGRID_DGIdb.gt b/data/Networks/internal_BioGRID_DGIdb.gt index 3a5214ee7f2869e02462d3e1e9e548a0c655f2c3..db2c7bf42000445d3440a0a8ca6ba005def8d657 100755 Binary files a/data/Networks/internal_BioGRID_DGIdb.gt and b/data/Networks/internal_BioGRID_DGIdb.gt differ diff --git a/data/Networks/internal_BioGRID_Drug Central.gt b/data/Networks/internal_BioGRID_Drug Central.gt index ab867b5bd7fa34c4f292d64a5b55c37c306a842c..92a22559c4030cba43c742f5453265fd1e8706ef 100644 Binary files a/data/Networks/internal_BioGRID_Drug Central.gt and b/data/Networks/internal_BioGRID_Drug Central.gt differ diff --git a/data/Networks/internal_BioGRID_DrugBank.gt b/data/Networks/internal_BioGRID_DrugBank.gt index 741fd267869b7eae62605127e8acd3b33e088ee7..92a22559c4030cba43c742f5453265fd1e8706ef 100755 Binary files a/data/Networks/internal_BioGRID_DrugBank.gt and b/data/Networks/internal_BioGRID_DrugBank.gt differ diff --git a/data/Networks/internal_BioGRID_NeDRex.gt b/data/Networks/internal_BioGRID_NeDRex.gt index d3b1654422adde22db4ac02bb1bd8a3488bc05a7..00546a935b798f59c8c59de615645005cd47c8b0 100644 Binary files a/data/Networks/internal_BioGRID_NeDRex.gt and b/data/Networks/internal_BioGRID_NeDRex.gt differ diff --git a/data/Networks/internal_IID_ChEMBL.gt b/data/Networks/internal_IID_ChEMBL.gt index 2e480c3bdb8b7fbcd244806d190f3b689e38d030..621a57df0588f45ef80d388b2aa296928182d85d 100644 Binary files a/data/Networks/internal_IID_ChEMBL.gt and b/data/Networks/internal_IID_ChEMBL.gt differ diff --git a/data/Networks/internal_IID_DGIdb.gt b/data/Networks/internal_IID_DGIdb.gt index fbbabb245a83a905b14918d74351f3d5de4e3ba4..db2c7bf42000445d3440a0a8ca6ba005def8d657 100644 Binary files a/data/Networks/internal_IID_DGIdb.gt and b/data/Networks/internal_IID_DGIdb.gt differ diff --git a/data/Networks/internal_IID_Drug Central.gt b/data/Networks/internal_IID_Drug Central.gt index aa74f9144ccb429809ff2d1dd2e64b36167cd22d..92a22559c4030cba43c742f5453265fd1e8706ef 100644 Binary files a/data/Networks/internal_IID_Drug Central.gt and b/data/Networks/internal_IID_Drug Central.gt differ diff --git a/data/Networks/internal_IID_DrugBank.gt b/data/Networks/internal_IID_DrugBank.gt index 7ce4e6840cceeeb531c25ebb6d907de3d5314faa..92a22559c4030cba43c742f5453265fd1e8706ef 100644 Binary files a/data/Networks/internal_IID_DrugBank.gt and b/data/Networks/internal_IID_DrugBank.gt differ diff --git a/data/Networks/internal_IID_NeDRex.gt b/data/Networks/internal_IID_NeDRex.gt index 52d8e78be6ce253096507d2033411a8ccb825eb7..00546a935b798f59c8c59de615645005cd47c8b0 100644 Binary files a/data/Networks/internal_IID_NeDRex.gt and b/data/Networks/internal_IID_NeDRex.gt differ diff --git a/data/Networks/internal_IntAct_ChEMBL.gt b/data/Networks/internal_IntAct_ChEMBL.gt index 59e3b73f077c700b2e50fc36c8c6d1361a8d3130..621a57df0588f45ef80d388b2aa296928182d85d 100644 Binary files a/data/Networks/internal_IntAct_ChEMBL.gt and b/data/Networks/internal_IntAct_ChEMBL.gt differ diff --git a/data/Networks/internal_IntAct_DGIdb.gt b/data/Networks/internal_IntAct_DGIdb.gt index 1a66d9c8eee7c190b887f74e17a9f98461b34c9f..db2c7bf42000445d3440a0a8ca6ba005def8d657 100644 Binary files a/data/Networks/internal_IntAct_DGIdb.gt and b/data/Networks/internal_IntAct_DGIdb.gt differ diff --git a/data/Networks/internal_IntAct_Drug Central.gt b/data/Networks/internal_IntAct_Drug Central.gt index 0925d46516f61b305c0f122659d9f0b1a651e86e..92a22559c4030cba43c742f5453265fd1e8706ef 100644 Binary files a/data/Networks/internal_IntAct_Drug Central.gt and b/data/Networks/internal_IntAct_Drug Central.gt differ diff --git a/data/Networks/internal_IntAct_DrugBank.gt b/data/Networks/internal_IntAct_DrugBank.gt index 9acd625091d53c6accf89af908fb111778d00aee..92a22559c4030cba43c742f5453265fd1e8706ef 100644 Binary files a/data/Networks/internal_IntAct_DrugBank.gt and b/data/Networks/internal_IntAct_DrugBank.gt differ diff --git a/data/Networks/internal_IntAct_NeDRex.gt b/data/Networks/internal_IntAct_NeDRex.gt index 7a90f6fa33f73278544ee3ede72b548ae5e957fa..00546a935b798f59c8c59de615645005cd47c8b0 100644 Binary files a/data/Networks/internal_IntAct_NeDRex.gt and b/data/Networks/internal_IntAct_NeDRex.gt differ diff --git a/data/Networks/internal_NeDRex_ChEMBL.gt b/data/Networks/internal_NeDRex_ChEMBL.gt index 7686c32639dc8ef2dba2519b4c08ed4ad9d57db6..70e113f7d74c7db2b5d95ef806273634a8351d4a 100644 Binary files a/data/Networks/internal_NeDRex_ChEMBL.gt and b/data/Networks/internal_NeDRex_ChEMBL.gt differ diff --git a/data/Networks/internal_NeDRex_DGIdb.gt b/data/Networks/internal_NeDRex_DGIdb.gt index 333655f5a1b905a0505c3a3b5a28b9f2f8f8ba6c..61601dae99ae3861f14162f8d1a86e25c85271e7 100644 Binary files a/data/Networks/internal_NeDRex_DGIdb.gt and b/data/Networks/internal_NeDRex_DGIdb.gt differ diff --git a/data/Networks/internal_NeDRex_Drug Central.gt b/data/Networks/internal_NeDRex_Drug Central.gt index fa3e3e4aa0eed81be536bbafb09e23f1e003a45c..3768d14e608f3157e0fb0777e2a2b69f0390c975 100644 Binary files a/data/Networks/internal_NeDRex_Drug Central.gt and b/data/Networks/internal_NeDRex_Drug Central.gt differ diff --git a/data/Networks/internal_NeDRex_DrugBank.gt b/data/Networks/internal_NeDRex_DrugBank.gt index 6e264703a8b52032571d688b7c76b62ec5ad7ac9..3768d14e608f3157e0fb0777e2a2b69f0390c975 100644 Binary files a/data/Networks/internal_NeDRex_DrugBank.gt and b/data/Networks/internal_NeDRex_DrugBank.gt differ diff --git a/data/Networks/internal_NeDRex_NeDRex.gt b/data/Networks/internal_NeDRex_NeDRex.gt index ec1fcff571d9adcbbcdb184fd0e4d9c6a337db6e..4593ac3fed31c14105876f5fe04d57cf2e0be57b 100644 Binary files a/data/Networks/internal_NeDRex_NeDRex.gt and b/data/Networks/internal_NeDRex_NeDRex.gt differ diff --git a/data/Networks/internal_STRING_ChEMBL.gt b/data/Networks/internal_STRING_ChEMBL.gt index 8b2dc1bfc15dbfbc0618b365c7137fb218726b50..0a7e63a0a8d0bc97bf53e86bfeeebfd0b846e47c 100755 Binary files a/data/Networks/internal_STRING_ChEMBL.gt and b/data/Networks/internal_STRING_ChEMBL.gt differ diff --git a/data/Networks/internal_STRING_DGIdb.gt b/data/Networks/internal_STRING_DGIdb.gt index a23445193ece6049ba9db6dfe77dbdf5d30bccb3..6439136cc86b91c1a4674ffc9f483c8a71635d1b 100755 Binary files a/data/Networks/internal_STRING_DGIdb.gt and b/data/Networks/internal_STRING_DGIdb.gt differ diff --git a/data/Networks/internal_STRING_Drug Central.gt b/data/Networks/internal_STRING_Drug Central.gt index f7d7186c14933f2381ce06cf699c7704f72bc394..f1e3cf40ce9c394e051cfbaee6a5a207aca454bd 100644 Binary files a/data/Networks/internal_STRING_Drug Central.gt and b/data/Networks/internal_STRING_Drug Central.gt differ diff --git a/data/Networks/internal_STRING_DrugBank.gt b/data/Networks/internal_STRING_DrugBank.gt index 25134478b90acd7d5516b36ef71ace114f1ffb32..f1e3cf40ce9c394e051cfbaee6a5a207aca454bd 100755 Binary files a/data/Networks/internal_STRING_DrugBank.gt and b/data/Networks/internal_STRING_DrugBank.gt differ diff --git a/data/Networks/internal_STRING_NeDRex.gt b/data/Networks/internal_STRING_NeDRex.gt index 5d92b648459d22882b813d780af1381287aa3b3c..88721e5f47bc535bfb9cff673b1cebd2211d01c9 100644 Binary files a/data/Networks/internal_STRING_NeDRex.gt and b/data/Networks/internal_STRING_NeDRex.gt differ diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index 43bf32df3da5898d25d96f8abd734edf4872ea3c..3c90d86f54ec7daebdfb9694939d1038ed24857c 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone python3 manage.py migrate python3 manage.py createfixtures python3 manage.py cleanuptasks -python3 manage.py populate_db --update -a +#python3 manage.py populate_db --update -a python3 manage.py make_graphs /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" diff --git a/tasks/multi_steiner.py b/tasks/multi_steiner.py index affaac189447a1843c63aeaf439c5c3ec7e56586..30a8adb887a4814d98a3b0363ad758be61d8a818 100755 --- a/tasks/multi_steiner.py +++ b/tasks/multi_steiner.py @@ -127,39 +127,52 @@ def multi_steiner(task_hook: TaskHook): tree_edges.append((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) cost_first_tree = sum([weights[g.edge(source, target)] for source, target in tree_edges]) # returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=first_tree.vertex_properties["name"][node])[0]) for node in range(first_tree.num_vertices())) + print(f"Before gtu: Costs={cost_first_tree}") returned_nodes = set(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=first_tree.vertex_properties[node_name_attribute][node])[0]) for node in range(first_tree.num_vertices())) - - + print(f"After gtu: {returned_nodes}") + print(num_trees) if num_trees > 1: + print("num_trees > 1") is_bridge = find_bridges(g) + print("found bridges") edge_filter = g.new_edge_property("boolean", True) + print("filtered edges") found_new_tree = True while len(tree_edges) > 0: + print(f"Tree edges length: {len(tree_edges)}") if found_new_tree: task_hook.set_progress(float(num_found_trees + 2) / (float(num_trees + 3)), "Computing Steiner tree {} of {}.".format(num_found_trees + 1, num_trees)) found_new_tree = False tree_edge = tree_edges.pop() + print("1") g_edge = g.edge(tree_edge[0], tree_edge[1]) if not is_bridge[g_edge]: + print("2") edge_filter[g_edge] = False g.set_edge_filter(edge_filter) next_tree = steiner_tree(g, seeds, seed_map, weights, hub_penalty > 0) + print("3") next_tree_edges = set() for next_tree_edge in next_tree.edges(): # source_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.source()]] # target_name = next_tree.vertex_properties["name"][next_tree.vertex_index[next_tree_edge.target()]] # next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties['name'], match=source_name)[0], gtu.find_vertex(g, prop=g.vertex_properties['name'], match=target_name)[0])) + print("4") source_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.source()]] target_name = next_tree.vertex_properties[node_name_attribute][next_tree.vertex_index[next_tree_edge.target()]] next_tree_edges.add((gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=source_name)[0],gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute], match=target_name)[0])) cost_next_tree = sum([weights[g.edge(source, target)] for source, target in next_tree_edges]) if cost_next_tree <= cost_first_tree * ((100.0 + tolerance) / 100.0): + print("5") found_new_tree = True num_found_trees += 1 for node in range(next_tree.num_vertices()): + print("GTU again") # returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties['name'], match=next_tree.vertex_properties["name"][node])[0])) returned_nodes.add(int(gtu.find_vertex(g, prop=g.vertex_properties[node_name_attribute],match=next_tree.vertex_properties[node_name_attribute][node])[0])) + print("GTU done") removed_edges = [] + print("6") for source, target in tree_edges: if not ((source, target) in set(next_tree_edges)) or ((target, source) in set(next_tree_edges)): removed_edges.append((source, target)) diff --git a/tasks/util/find_bridges.py b/tasks/util/find_bridges.py index ad9cc16659ad18bc59f5a5a3be0b723c0fddef32..6fc36b665e97b4d9074501535ae885ef24054f0e 100755 --- a/tasks/util/find_bridges.py +++ b/tasks/util/find_bridges.py @@ -4,13 +4,16 @@ __time = 0 def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): - + print("Dfs_find_bridges") visited[node] = True global __time disc[node] = __time low[node] = __time __time += 1 - + + print(f"Checking neighbors of {g.vertex(node)}") + print(f"Degree = {g.vertex(node).out_degree()}") + for nb in g.get_all_neighbors(node): if not visited[nb]: parent[nb] = node @@ -18,12 +21,12 @@ def __dfs_find_bridges(g, node, visited, disc, low, parent, is_bridge): low[node] = min(low[node], low[nb]) if low[nb] > disc[node]: is_bridge[g.edge(node, nb)] = True - elif int(nb) != parent[node]: + elif int(nb) != parent[node]: #TODO can in theory be removed because low[node] = min(low[node], disc[nb]) def find_bridges(g): r"""Finds all bridges in a graph.""" - + print("Finding bridges") global __time __time = 0 sys.setrecursionlimit(g.num_vertices() + 1) diff --git a/tasks/util/read_graph_tool_graph.py b/tasks/util/read_graph_tool_graph.py index bf26636c266a7d8a8ea96eb93c136f4fe70fde0c..fb5b3aad20bd2dafde7a268a847cce601d5d4596 100755 --- a/tasks/util/read_graph_tool_graph.py +++ b/tasks/util/read_graph_tool_graph.py @@ -79,13 +79,25 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals # Check that all seed seeds have been matched and throw error, otherwise. # print(deleted_nodes) - print(seed_ids) - for protein, found in is_matched.items(): - if not found: - raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path)) + # print(seed_ids) + # for protein, found in is_matched.items(): + # if not found: + # raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path)) # Delete edges that should be ignored or are not contained in the selected dataset. deleted_edges = [] + + for edge in g.edges(): + if edge.source == edge.target: + deleted_edges.append(edge) + + g.set_fast_edge_removal(fast=True) + for edge in deleted_edges: + g.remove_edge(edge) + g.set_fast_edge_removal(fast=False) + + deleted_edges = [] + if (drug_ids and not include_indirect_drugs): # If only_direct_drugs should be included, remove any drug-protein edges that the drug is not a direct neighbor of any seeds direct_drugs = set() for edge in g.edges():