From 7fb96851893b097bd61c266bdc67e5c910a29e54 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Wed, 8 Feb 2023 16:16:22 +0100
Subject: [PATCH] restructured data import

---
 .dockerignore                                 |   2 +
 .gitignore                                    |   2 +-
 README.md                                     |   4 +
 docker-compose.yml                            |   4 +-
 docker-django.env.dev                         |   2 +-
 .../management/commands/import_from_nedrex.py |  16 +--
 .../management/includes/DatasetLoader.py      | 132 +++++++++---------
 requirements.txt                              |   4 +-
 8 files changed, 86 insertions(+), 80 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index 6b8710a..c7eda2b 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,3 @@
 .git
+data
+venv
diff --git a/.gitignore b/.gitignore
index 61228f1..eba7b16 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,10 +7,10 @@ venv/
 *.DS_Store
 db.sqlite3
 supervisord.log
-supervisord.log
 supervisord.pid
 docker-entrypoint.lock
 celerybeat-schedule.bak
 celerybeat-schedule.dat
 celerybeat-schedule.dir
 docker-django.env.prodnetworks.zip
+data/Networks/
diff --git a/README.md b/README.md
index 3a8396b..b72c776 100755
--- a/README.md
+++ b/README.md
@@ -26,3 +26,7 @@ python3 manage.py make_graphs
 
 ### Docker DEV environment (building is optional)
 ``docker-compose -f docker-compose.yml up -d --build``
+
+### Data folder
+Static datasets are mounted from a directory now, instead of fusing them into the image. Download them from the following link and put them into the data folder that is mounted by the docker-compose.yml:
+https://wolken.zbh.uni-hamburg.de/index.php/s/gywnL3HP26CWrgA
diff --git a/docker-compose.yml b/docker-compose.yml
index d98807f..dbb61b5 100755
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,7 +13,7 @@ services:
     restart: always
     volumes:
       - drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations
-      - drugstone_data_volume:/usr/src/drugstone/data
+      - ./data:/usr/src/drugstone/data
     ports:
       - 8001:8000
     networks:
@@ -60,7 +60,7 @@ services:
     container_name: drugstone_celery
     hostname: drugstone_celery
     volumes:
-      - drugstone_data_volume:/usr/src/drugstone/data
+      - ./data:/usr/src/drugstone/data
     env_file:
       - './docker-django.env.dev'
     depends_on:
diff --git a/docker-django.env.dev b/docker-django.env.dev
index 91d6c04..6f5a076 100644
--- a/docker-django.env.dev
+++ b/docker-django.env.dev
@@ -15,4 +15,4 @@ CELERY_BROKER_URL=redis://redis:6379/0
 FLOWER_PORT=8888
 FLOWER_BASIC_AUTH=drugstone:test
 GT_THREADS=2
-DB_UPDATE_ON_START=0
\ No newline at end of file
+DB_UPDATE_ON_START=1
\ No newline at end of file
diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py
index bc09491..dd8a434 100644
--- a/drugstone/management/commands/import_from_nedrex.py
+++ b/drugstone/management/commands/import_from_nedrex.py
@@ -256,8 +256,8 @@ class NedrexImporter:
 
         iter_edge_collection('drug_has_target', add_dpi)
         models.ProteinDrugInteraction.objects.bulk_create(bulk)
-        new_datasets = [dataset].extend(source_datasets.values())
-        DatasetLoader.remove_old_pdi_data(new_datasets, licenced)
+        # new_datasets = [dataset].extend(source_datasets.values())
+        # DatasetLoader.remove_old_pdi_data(new_datasets, licenced)
         return len(bulk)
 
     def import_protein_protein_interactions(self, dataset: PPIDataset, update):
@@ -316,8 +316,8 @@ class NedrexImporter:
 
         iter_ppi(add_ppi)
         models.ProteinProteinInteraction.objects.bulk_create(bulk)
-        new_datasets = [dataset, source_datasets.values()]
-        DatasetLoader.remove_old_ppi_data(new_datasets, licenced)
+        # new_datasets = [dataset, source_datasets.values()]
+        # DatasetLoader.remove_old_ppi_data(new_datasets, licenced)
         return len(bulk)
 
     def import_protein_disorder_associations(self, dataset, update):
@@ -367,8 +367,8 @@ class NedrexImporter:
 
         iter_edge_collection('gene_associated_with_disorder', add_pdis)
         models.ProteinDisorderAssociation.objects.bulk_create(bulk)
-        new_datasets = [dataset, source_datasets.values()]
-        DatasetLoader.remove_old_pdis_data(new_datasets, licenced)
+        # new_datasets = [dataset, source_datasets.values()]
+        # DatasetLoader.remove_old_pdis_data(new_datasets, licenced)
         return len(bulk)
 
     def import_drug_disorder_indications(self, dataset, update):
@@ -416,6 +416,6 @@ class NedrexImporter:
 
         iter_edge_collection('drug_has_indication', add_drdis)
         models.DrugDisorderIndication.objects.bulk_create(bulk)
-        new_datasets = [dataset, source_datasets.values()]
-        DatasetLoader.remove_old_drdi_data(new_datasets, licenced)
+        # new_datasets = [dataset, source_datasets.values()]
+        # DatasetLoader.remove_old_drdi_data(new_datasets, licenced)
         return len(bulk)
diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py
index 15dc16e..062b7d9 100644
--- a/drugstone/management/includes/DatasetLoader.py
+++ b/drugstone/management/includes/DatasetLoader.py
@@ -300,69 +300,69 @@ def is_licenced_drdi_source(source):
     return False
 
 
-def remove_old_pdi_data(new_datasets, licenced):
-    if new_datasets is None:
-        return
-    for dataset in new_datasets:
-        print("Deleting all except "+str(dataset))
-        try:
-            for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced):
-                print("Testing: "+str(d))
-                if d != dataset:
-                    print("Deleting: "+str(d))
-                    d.delete()
-        except Exception as e:
-            print("Error when trying to delete old datasets")
-            print(e)
-            continue
-
-
-def remove_old_ppi_data(new_datasets, licenced):
-    if new_datasets is None:
-        return
-    for dataset in new_datasets:
-        print("Deleting all except " + str(dataset))
-        try:
-            for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced):
-                print("Testing: " + str(d))
-                if d != dataset:
-                    print("Deleting: " + str(d))
-                    d.delete()
-        except Exception as e:
-            print("Error when trying to delete old datasets")
-            print(e)
-            continue
-
-
-def remove_old_pdis_data(new_datasets, licenced):
-    if new_datasets is None:
-        return
-    for dataset in new_datasets:
-        print("Deleting all except " + str(dataset))
-        try:
-            for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced):
-                print("Testing: " + str(d))
-                if d != dataset:
-                    print("Deleting: " + str(d))
-                    d.delete()
-        except Exception as e:
-            print("Error when trying to delete old datasets")
-            print(e)
-            continue
-
-
-def remove_old_drdi_data(new_datasets, licenced):
-    if new_datasets is None:
-        return
-    for dataset in new_datasets:
-        print("Deleting all except " + str(dataset))
-        try:
-            for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced):
-                print("Testing: " + str(d))
-                if d != dataset:
-                    print("Deleting: " + str(d))
-                    d.delete()
-        except Exception as e:
-            print("Error when trying to delete old datasets")
-            print(e)
-            continue
+# def remove_old_pdi_data(new_datasets, licenced):
+#     if new_datasets is None:
+#         return
+#     for dataset in new_datasets:
+#         print("Deleting all except "+str(dataset))
+#         try:
+#             for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced):
+#                 print("Testing: "+str(d))
+#                 if d != dataset:
+#                     print("Deleting: "+str(d))
+#                     d.delete()
+#         except Exception as e:
+#             print("Error when trying to delete old datasets")
+#             print(e)
+#             continue
+
+
+# def remove_old_ppi_data(new_datasets, licenced):
+#     if new_datasets is None:
+#         return
+#     for dataset in new_datasets:
+#         print("Deleting all except " + str(dataset))
+#         try:
+#             for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced):
+#                 print("Testing: " + str(d))
+#                 if d != dataset:
+#                     print("Deleting: " + str(d))
+#                     d.delete()
+#         except Exception as e:
+#             print("Error when trying to delete old datasets")
+#             print(e)
+#             continue
+
+
+# def remove_old_pdis_data(new_datasets, licenced):
+#     if new_datasets is None:
+#         return
+#     for dataset in new_datasets:
+#         print("Deleting all except " + str(dataset))
+#         try:
+#             for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced):
+#                 print("Testing: " + str(d))
+#                 if d != dataset:
+#                     print("Deleting: " + str(d))
+#                     d.delete()
+#         except Exception as e:
+#             print("Error when trying to delete old datasets")
+#             print(e)
+#             continue
+
+
+# def remove_old_drdi_data(new_datasets, licenced):
+#     if new_datasets is None:
+#         return
+#     for dataset in new_datasets:
+#         print("Deleting all except " + str(dataset))
+#         try:
+#             for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced):
+#                 print("Testing: " + str(d))
+#                 if d != dataset:
+#                     print("Deleting: " + str(d))
+#                     d.delete()
+#         except Exception as e:
+#             print("Error when trying to delete old datasets")
+#             print(e)
+#             continue
diff --git a/requirements.txt b/requirements.txt
index e2ca8b1..499918e 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,9 +3,9 @@ celery==5.2.7
 certifi==2022.12.7
 chardet==3.0.4
 click==8.1.3
-cryptography==38.0.3
+cryptography==39.0.1
 decorator==4.4.2
-Django==3.2.16
+Django==3.2.17
 django-cors-headers==3.4.0
 django-redis==4.11.0
 django-rq-dashboard==0.3.3
-- 
GitLab