From 7fb96851893b097bd61c266bdc67e5c910a29e54 Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Wed, 8 Feb 2023 16:16:22 +0100 Subject: [PATCH] restructured data import --- .dockerignore | 2 + .gitignore | 2 +- README.md | 4 + docker-compose.yml | 4 +- docker-django.env.dev | 2 +- .../management/commands/import_from_nedrex.py | 16 +-- .../management/includes/DatasetLoader.py | 132 +++++++++--------- requirements.txt | 4 +- 8 files changed, 86 insertions(+), 80 deletions(-) diff --git a/.dockerignore b/.dockerignore index 6b8710a..c7eda2b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,3 @@ .git +data +venv diff --git a/.gitignore b/.gitignore index 61228f1..eba7b16 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,10 @@ venv/ *.DS_Store db.sqlite3 supervisord.log -supervisord.log supervisord.pid docker-entrypoint.lock celerybeat-schedule.bak celerybeat-schedule.dat celerybeat-schedule.dir docker-django.env.prodnetworks.zip +data/Networks/ diff --git a/README.md b/README.md index 3a8396b..b72c776 100755 --- a/README.md +++ b/README.md @@ -26,3 +26,7 @@ python3 manage.py make_graphs ### Docker DEV environment (building is optional) ``docker-compose -f docker-compose.yml up -d --build`` + +### Data folder +Static datasets are mounted from a directory now, instead of fusing them into the image. Download them from the following link and put them into the data folder that is mounted by the docker-compose.yml: +https://wolken.zbh.uni-hamburg.de/index.php/s/gywnL3HP26CWrgA diff --git a/docker-compose.yml b/docker-compose.yml index d98807f..dbb61b5 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: restart: always volumes: - drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations - - drugstone_data_volume:/usr/src/drugstone/data + - ./data:/usr/src/drugstone/data ports: - 8001:8000 networks: @@ -60,7 +60,7 @@ services: container_name: drugstone_celery hostname: drugstone_celery volumes: - - drugstone_data_volume:/usr/src/drugstone/data + - ./data:/usr/src/drugstone/data env_file: - './docker-django.env.dev' depends_on: diff --git a/docker-django.env.dev b/docker-django.env.dev index 91d6c04..6f5a076 100644 --- a/docker-django.env.dev +++ b/docker-django.env.dev @@ -15,4 +15,4 @@ CELERY_BROKER_URL=redis://redis:6379/0 FLOWER_PORT=8888 FLOWER_BASIC_AUTH=drugstone:test GT_THREADS=2 -DB_UPDATE_ON_START=0 \ No newline at end of file +DB_UPDATE_ON_START=1 \ No newline at end of file diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py index bc09491..dd8a434 100644 --- a/drugstone/management/commands/import_from_nedrex.py +++ b/drugstone/management/commands/import_from_nedrex.py @@ -256,8 +256,8 @@ class NedrexImporter: iter_edge_collection('drug_has_target', add_dpi) models.ProteinDrugInteraction.objects.bulk_create(bulk) - new_datasets = [dataset].extend(source_datasets.values()) - DatasetLoader.remove_old_pdi_data(new_datasets, licenced) + # new_datasets = [dataset].extend(source_datasets.values()) + # DatasetLoader.remove_old_pdi_data(new_datasets, licenced) return len(bulk) def import_protein_protein_interactions(self, dataset: PPIDataset, update): @@ -316,8 +316,8 @@ class NedrexImporter: iter_ppi(add_ppi) models.ProteinProteinInteraction.objects.bulk_create(bulk) - new_datasets = [dataset, source_datasets.values()] - DatasetLoader.remove_old_ppi_data(new_datasets, licenced) + # new_datasets = [dataset, source_datasets.values()] + # DatasetLoader.remove_old_ppi_data(new_datasets, licenced) return len(bulk) def import_protein_disorder_associations(self, dataset, update): @@ -367,8 +367,8 @@ class NedrexImporter: iter_edge_collection('gene_associated_with_disorder', add_pdis) models.ProteinDisorderAssociation.objects.bulk_create(bulk) - new_datasets = [dataset, source_datasets.values()] - DatasetLoader.remove_old_pdis_data(new_datasets, licenced) + # new_datasets = [dataset, source_datasets.values()] + # DatasetLoader.remove_old_pdis_data(new_datasets, licenced) return len(bulk) def import_drug_disorder_indications(self, dataset, update): @@ -416,6 +416,6 @@ class NedrexImporter: iter_edge_collection('drug_has_indication', add_drdis) models.DrugDisorderIndication.objects.bulk_create(bulk) - new_datasets = [dataset, source_datasets.values()] - DatasetLoader.remove_old_drdi_data(new_datasets, licenced) + # new_datasets = [dataset, source_datasets.values()] + # DatasetLoader.remove_old_drdi_data(new_datasets, licenced) return len(bulk) diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py index 15dc16e..062b7d9 100644 --- a/drugstone/management/includes/DatasetLoader.py +++ b/drugstone/management/includes/DatasetLoader.py @@ -300,69 +300,69 @@ def is_licenced_drdi_source(source): return False -def remove_old_pdi_data(new_datasets, licenced): - if new_datasets is None: - return - for dataset in new_datasets: - print("Deleting all except "+str(dataset)) - try: - for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: "+str(d)) - if d != dataset: - print("Deleting: "+str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue - - -def remove_old_ppi_data(new_datasets, licenced): - if new_datasets is None: - return - for dataset in new_datasets: - print("Deleting all except " + str(dataset)) - try: - for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: " + str(d)) - if d != dataset: - print("Deleting: " + str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue - - -def remove_old_pdis_data(new_datasets, licenced): - if new_datasets is None: - return - for dataset in new_datasets: - print("Deleting all except " + str(dataset)) - try: - for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: " + str(d)) - if d != dataset: - print("Deleting: " + str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue - - -def remove_old_drdi_data(new_datasets, licenced): - if new_datasets is None: - return - for dataset in new_datasets: - print("Deleting all except " + str(dataset)) - try: - for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: " + str(d)) - if d != dataset: - print("Deleting: " + str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue +# def remove_old_pdi_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except "+str(dataset)) +# try: +# for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: "+str(d)) +# if d != dataset: +# print("Deleting: "+str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue + + +# def remove_old_ppi_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except " + str(dataset)) +# try: +# for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: " + str(d)) +# if d != dataset: +# print("Deleting: " + str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue + + +# def remove_old_pdis_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except " + str(dataset)) +# try: +# for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: " + str(d)) +# if d != dataset: +# print("Deleting: " + str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue + + +# def remove_old_drdi_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except " + str(dataset)) +# try: +# for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: " + str(d)) +# if d != dataset: +# print("Deleting: " + str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue diff --git a/requirements.txt b/requirements.txt index e2ca8b1..499918e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,9 @@ celery==5.2.7 certifi==2022.12.7 chardet==3.0.4 click==8.1.3 -cryptography==38.0.3 +cryptography==39.0.1 decorator==4.4.2 -Django==3.2.16 +Django==3.2.17 django-cors-headers==3.4.0 django-redis==4.11.0 django-rq-dashboard==0.3.3 -- GitLab