From 934748b2f6dc08f2ff604f6063e4b23ec1951a1c Mon Sep 17 00:00:00 2001
From: "Hartung, Michael" <michael.hartung@uni-hamburg.de>
Date: Thu, 5 May 2022 13:06:17 +0200
Subject: [PATCH] do not remove largest component in the task-graph

---
 Untitled.ipynb                      |   6 +
 Untitled1.ipynb                     | 244 ++++++++++++++++++++++++++++
 deploy_dev.sh                       |   2 +
 deploy_prod.sh                      |   2 +
 docker-entrypoint.sh                |   1 -
 import-data.sh                      |   2 +-
 netex/views.py                      |   3 +-
 tasks/util/read_graph_tool_graph.py |  10 +-
 8 files changed, 260 insertions(+), 10 deletions(-)
 create mode 100644 Untitled.ipynb
 create mode 100644 Untitled1.ipynb
 create mode 100644 deploy_dev.sh
 create mode 100644 deploy_prod.sh

diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 0000000..363fcab
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Untitled1.ipynb b/Untitled1.ipynb
new file mode 100644
index 0000000..3c46427
--- /dev/null
+++ b/Untitled1.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "f02bf93e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from graph_tool.all import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "3f6e4401",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "g = load_graph(\"data-NetExpander/Networks/internal_STRING_DrugBank.gt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "7caa2dad",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<Graph object, undirected, with 24092 vertices and 226778 edges at 0x7fd1e8732ef0>"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "g"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "ec03761f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ids = list(g.vertex_properties[\"netex_id\"])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "f7e03bff",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "24092"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(ids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "17b2813f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7601"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len([x for x in ids if x.startswith('dr')])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "4c35d260",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "16491"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len([x for x in ids if x.startswith('p')])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "bc0c6ae6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "'p12627' in list(g.vertex_properties[\"netex_id\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "3900f9bb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/michi/opt/anaconda3/envs/drugstone/lib/python3.6/site-packages/urllib3/connectionpool.py:1050: InsecureRequestWarning: Unverified HTTPS request is being made to host 'drugst.one'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
+      "  InsecureRequestWarning,\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[{'id': 'APOLD1',\n",
+       "  'netexId': 'p30505',\n",
+       "  'uniprotAc': 'Q96LR9',\n",
+       "  'symbol': 'APOLD1',\n",
+       "  'proteinName': 'Apolipoprotein L domain-containing protein 1',\n",
+       "  'entrez': '81575',\n",
+       "  'ensg': ['ENSG00000178878']}]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "\n",
+    "header= { 'Content-Type': 'application/json' }\n",
+    "API = 'https://drugst.one/drugstone_api/'\n",
+    "response = requests.post(API+'map_nodes/', headers=header, verify=False, data=json.dumps({'nodes': [{'id': \"APOLD1\"}], 'identifier': \"symbol\"}))\n",
+    "\n",
+    "\n",
+    "\n",
+    "response.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6de8ed93",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6407897a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "994c656a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d7619c0e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3161b301",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/deploy_dev.sh b/deploy_dev.sh
new file mode 100644
index 0000000..c44af74
--- /dev/null
+++ b/deploy_dev.sh
@@ -0,0 +1,2 @@
+docker build -t gitlab.rrz.uni-hamburg.de:4567/groups/cosy-bio/drugst.one/backend:dev -f ./Dockerfile .
+docker push gitlab.rrz.uni-hamburg.de:4567/groups/cosy-bio/drugst.one/backend:dev
\ No newline at end of file
diff --git a/deploy_prod.sh b/deploy_prod.sh
new file mode 100644
index 0000000..45e4fab
--- /dev/null
+++ b/deploy_prod.sh
@@ -0,0 +1,2 @@
+docker build -t gitlab.rrz.uni-hamburg.de:4567/cosy-bio/drugst.one/backend:prod -f ./Dockerfile .
+docker push gitlab.rrz.uni-hamburg.de:4567/cosy-bio/drugst.one/backend:prod
\ No newline at end of file
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 340f566..2b49875 100755
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -4,6 +4,5 @@ python3 manage.py migrate --run-syncdb
 python3 manage.py createfixtures
 python3 manage.py cleanuptasks
 # sh import-data.sh
-# python3 manage.py make_graphs
 
 /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
diff --git a/import-data.sh b/import-data.sh
index 851ec4c..f3f7b9f 100755
--- a/import-data.sh
+++ b/import-data.sh
@@ -4,4 +4,4 @@ python3 manage.py populate_db -p .
 python3 manage.py populate_db --data_dir . --exp_file gene_tissue_expression.gct
 python3 manage.py populate_db --data_dir . --drug_file drug-file.txt
 python3 manage.py populate_db -pp .
-python3 manage.py populate_db -pd .
\ No newline at end of file
+python3 manage.py populate_db -pdr .
\ No newline at end of file
diff --git a/netex/views.py b/netex/views.py
index adc2e8c..8714d68 100755
--- a/netex/views.py
+++ b/netex/views.py
@@ -166,7 +166,7 @@ class TaskView(APIView):
 
 
 @api_view(['POST'])
-def fetch_edges(request) -> Response:
+def fetch_edges(request) -> Response:   
     """Retrieves interactions between nodes given as a list of netex IDs.
 
     Args:
@@ -178,7 +178,6 @@ def fetch_edges(request) -> Response:
     """
     dataset = request.data.get('dataset', 'STRING')
     netex_ids = [node['netex_id'][1:] for node in request.data.get('nodes', '[]') if 'netex_id' in node]
-
     dataset_object = models.PPIDataset.objects.get(name__iexact=dataset)
     interaction_objects = models.ProteinProteinInteraction.objects.filter(
         Q(ppi_dataset=dataset_object) & Q(from_protein__in=netex_ids) & Q(to_protein__in=netex_ids))
diff --git a/tasks/util/read_graph_tool_graph.py b/tasks/util/read_graph_tool_graph.py
index d5759c5..38010d6 100755
--- a/tasks/util/read_graph_tool_graph.py
+++ b/tasks/util/read_graph_tool_graph.py
@@ -37,9 +37,9 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
       The graph indices for all drug nodes
     """
     # Read the graph.
-  
-    gg = gt.load_graph(file_path)
-    g = gtt.extract_largest_component(gg, directed=False, prune=True)   # this line is added since we need to work with the LCC of the graphs for all algorithms
+
+    g = gt.load_graph(file_path)
+    # g = gtt.extract_largest_component(gg, directed=False, prune=True)   # this line is added since we need to work with the LCC of the graphs for all algorithms
   
     # drug_protein = "DrugHasTarget"
     d_type = "drug"
@@ -62,10 +62,8 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
     is_matched = {protein: False for protein in seeds}
     for node in range(g.num_vertices()):
         node_type = g.vertex_properties["type"][node]
-        # if g.vertex_properties["name"][node] in seeds:
         if g.vertex_properties[node_name_attribute][node] in seeds:
             seed_ids.append(node)
-            # is_matched[g.vertex_properties["name"][node]] = True
             is_matched[g.vertex_properties[node_name_attribute][node]] = True
         if node_type == d_type:
             if include_non_approved_drugs:
@@ -78,7 +76,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
     # Check that all seed seeds have been matched and throw error, otherwise.
     for protein, found in is_matched.items():
         if not found:
-            raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
+            raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
     
     # Delete edges that should be ignored or are not contained in the selected dataset.
     deleted_edges = []
-- 
GitLab