From db3da4acafc6a775b51decced904b5ab0edc4724 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Thu, 30 Jun 2022 17:49:50 +0200
Subject: [PATCH] nedrex integration and cleanup progressed

Former-commit-id: e064c2ed7ee312a0e6efb5ce07125e7cd2c38fa7 [formerly ee0efdbd94033f23904dae682523e5f0adf5fac7]
Former-commit-id: 2f82b1ea117749164c5a9241805dfe160cf1effb
---
 Dockerfile                                    |   2 +-
 .../Disorders/disorders.tsv                   |   0
 .../drugbank-drug_disorder_indication.tsv     |   0
 {data_drugstone => data}/Drugs/drug-file.txt  |   0
 .../Drugs/drugbank_labels.csv                 |   0
 .../Expression/gene_tissue_expression.gct     |   0
 .../PDI/BIOGRID-CHEMICALS-3.5.187.chemtab.txt |   0
 .../PDI/DGIdb_drug_gene_interactions.csv      |   0
 .../PDI/chembl_drug_gene_interactions.csv     |   0
 .../chembl_drug_gene_interactions_uniq.csv    |   0
 .../PDI/drugbank_drug_gene_interactions.csv   |   0
 .../drugbank_drug_gene_interactions_uniq.csv  |   0
 .../disgenet-protein_disorder_association.tsv |   0
 .../PPDr-temp.graphml.REMOVED.git-id          |   0
 .../PPI-temp.graphml.REMOVED.git-id           |   0
 ...o_sapiens-3.5.187.mitab.txt.REMOVED.git-id |   0
 {data_drugstone => data}/PPI/apid_9606_Q2.txt |   0
 ...tome_homo-sapiens-protein-interactions.txt |   0
 .../PPI/string_interactions.csv               |   0
 .../Proteins/entrez_to_ensg.json              |   0
 .../Proteins/protein_list.csv                 |   0
 .../drug-protein-interaction.txt              |   0
 {data_drugstone => data}/protein-file.txt     |   0
 .../protein_protein_interaction_file.txt      |   0
 .../temp-PPDr.graphml.REMOVED.git-id          |   0
 docker-compose.yml                            |  23 +-
 drugstone/backend_tasks.py                    |   2 +-
 .../management/commands/import_from_nedrex.py | 201 +++++++---
 drugstone/management/commands/make_graphs.py  |   4 +-
 drugstone/management/commands/populate_db.py  | 176 +++++----
 drugstone/management/commands/test.py         |  20 +-
 drugstone/management/includes/DataLoader.py   |  20 +-
 .../management/includes/DataPopulator.py      | 356 +++++++-----------
 .../management/includes/DatasetLoader.py      |  99 +++++
 drugstone/management/includes/NodeCache.py    |  60 +++
 drugstone/models.py                           |  82 ++--
 .../docker-entrypoint.sh                      |  10 +-
 scripts/import-data.sh                        |  16 +-
 tasks/betweenness_centrality_test.py          |   2 +-
 tasks/closeness_centrality_test.py            |   2 +-
 tasks/degree_centrality_test.py               |   2 +-
 tasks/keypathwayminer_test.py                 |   2 +-
 tasks/multi_steiner_test.py                   |   2 +-
 tasks/network_proximity_test.py               |   2 +-
 tasks/task_hook.py                            |   2 +-
 tasks/task_test.py                            |   2 +-
 tasks/trust_rank_test.py                      |   2 +-
 47 files changed, 681 insertions(+), 408 deletions(-)
 rename {data_drugstone => data}/Disorders/disorders.tsv (100%)
 rename {data_drugstone => data}/DrDi/drugbank-drug_disorder_indication.tsv (100%)
 rename {data_drugstone => data}/Drugs/drug-file.txt (100%)
 rename {data_drugstone => data}/Drugs/drugbank_labels.csv (100%)
 rename {data_drugstone => data}/Expression/gene_tissue_expression.gct (100%)
 rename {data_drugstone => data}/PDI/BIOGRID-CHEMICALS-3.5.187.chemtab.txt (100%)
 rename {data_drugstone => data}/PDI/DGIdb_drug_gene_interactions.csv (100%)
 rename {data_drugstone => data}/PDI/chembl_drug_gene_interactions.csv (100%)
 rename {data_drugstone => data}/PDI/chembl_drug_gene_interactions_uniq.csv (100%)
 rename {data_drugstone => data}/PDI/drugbank_drug_gene_interactions.csv (100%)
 rename {data_drugstone => data}/PDI/drugbank_drug_gene_interactions_uniq.csv (100%)
 rename {data_drugstone => data}/PDi/disgenet-protein_disorder_association.tsv (100%)
 rename {data_drugstone => data}/PPDr-temp.graphml.REMOVED.git-id (100%)
 rename {data_drugstone => data}/PPI-temp.graphml.REMOVED.git-id (100%)
 rename {data_drugstone => data}/PPI/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.mitab.txt.REMOVED.git-id (100%)
 rename {data_drugstone => data}/PPI/apid_9606_Q2.txt (100%)
 rename {data_drugstone => data}/PPI/reactome_homo-sapiens-protein-interactions.txt (100%)
 rename {data_drugstone => data}/PPI/string_interactions.csv (100%)
 rename {data_drugstone => data}/Proteins/entrez_to_ensg.json (100%)
 rename {data_drugstone => data}/Proteins/protein_list.csv (100%)
 rename {data_drugstone => data}/drug-protein-interaction.txt (100%)
 rename {data_drugstone => data}/protein-file.txt (100%)
 rename {data_drugstone => data}/protein_protein_interaction_file.txt (100%)
 rename {data_drugstone => data}/temp-PPDr.graphml.REMOVED.git-id (100%)
 create mode 100644 drugstone/management/includes/DatasetLoader.py
 create mode 100644 drugstone/management/includes/NodeCache.py
 rename docker-entrypoint.sh => scripts/docker-entrypoint.sh (63%)

diff --git a/Dockerfile b/Dockerfile
index aec4578..5bafbf3 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,7 +22,7 @@ RUN pip install -r /usr/src/drugstone/requirements.txt
 RUN pip install gunicorn
 
 COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
-COPY ./docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh
+#COPY scripts/docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh
 # COPY ./scripts/ /usr/src/drugstone/scripts/
 COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/
 RUN pip install /usr/src/drugstone/python_nedrex/
diff --git a/data_drugstone/Disorders/disorders.tsv b/data/Disorders/disorders.tsv
similarity index 100%
rename from data_drugstone/Disorders/disorders.tsv
rename to data/Disorders/disorders.tsv
diff --git a/data_drugstone/DrDi/drugbank-drug_disorder_indication.tsv b/data/DrDi/drugbank-drug_disorder_indication.tsv
similarity index 100%
rename from data_drugstone/DrDi/drugbank-drug_disorder_indication.tsv
rename to data/DrDi/drugbank-drug_disorder_indication.tsv
diff --git a/data_drugstone/Drugs/drug-file.txt b/data/Drugs/drug-file.txt
similarity index 100%
rename from data_drugstone/Drugs/drug-file.txt
rename to data/Drugs/drug-file.txt
diff --git a/data_drugstone/Drugs/drugbank_labels.csv b/data/Drugs/drugbank_labels.csv
similarity index 100%
rename from data_drugstone/Drugs/drugbank_labels.csv
rename to data/Drugs/drugbank_labels.csv
diff --git a/data_drugstone/Expression/gene_tissue_expression.gct b/data/Expression/gene_tissue_expression.gct
similarity index 100%
rename from data_drugstone/Expression/gene_tissue_expression.gct
rename to data/Expression/gene_tissue_expression.gct
diff --git a/data_drugstone/PDI/BIOGRID-CHEMICALS-3.5.187.chemtab.txt b/data/PDI/BIOGRID-CHEMICALS-3.5.187.chemtab.txt
similarity index 100%
rename from data_drugstone/PDI/BIOGRID-CHEMICALS-3.5.187.chemtab.txt
rename to data/PDI/BIOGRID-CHEMICALS-3.5.187.chemtab.txt
diff --git a/data_drugstone/PDI/DGIdb_drug_gene_interactions.csv b/data/PDI/DGIdb_drug_gene_interactions.csv
similarity index 100%
rename from data_drugstone/PDI/DGIdb_drug_gene_interactions.csv
rename to data/PDI/DGIdb_drug_gene_interactions.csv
diff --git a/data_drugstone/PDI/chembl_drug_gene_interactions.csv b/data/PDI/chembl_drug_gene_interactions.csv
similarity index 100%
rename from data_drugstone/PDI/chembl_drug_gene_interactions.csv
rename to data/PDI/chembl_drug_gene_interactions.csv
diff --git a/data_drugstone/PDI/chembl_drug_gene_interactions_uniq.csv b/data/PDI/chembl_drug_gene_interactions_uniq.csv
similarity index 100%
rename from data_drugstone/PDI/chembl_drug_gene_interactions_uniq.csv
rename to data/PDI/chembl_drug_gene_interactions_uniq.csv
diff --git a/data_drugstone/PDI/drugbank_drug_gene_interactions.csv b/data/PDI/drugbank_drug_gene_interactions.csv
similarity index 100%
rename from data_drugstone/PDI/drugbank_drug_gene_interactions.csv
rename to data/PDI/drugbank_drug_gene_interactions.csv
diff --git a/data_drugstone/PDI/drugbank_drug_gene_interactions_uniq.csv b/data/PDI/drugbank_drug_gene_interactions_uniq.csv
similarity index 100%
rename from data_drugstone/PDI/drugbank_drug_gene_interactions_uniq.csv
rename to data/PDI/drugbank_drug_gene_interactions_uniq.csv
diff --git a/data_drugstone/PDi/disgenet-protein_disorder_association.tsv b/data/PDi/disgenet-protein_disorder_association.tsv
similarity index 100%
rename from data_drugstone/PDi/disgenet-protein_disorder_association.tsv
rename to data/PDi/disgenet-protein_disorder_association.tsv
diff --git a/data_drugstone/PPDr-temp.graphml.REMOVED.git-id b/data/PPDr-temp.graphml.REMOVED.git-id
similarity index 100%
rename from data_drugstone/PPDr-temp.graphml.REMOVED.git-id
rename to data/PPDr-temp.graphml.REMOVED.git-id
diff --git a/data_drugstone/PPI-temp.graphml.REMOVED.git-id b/data/PPI-temp.graphml.REMOVED.git-id
similarity index 100%
rename from data_drugstone/PPI-temp.graphml.REMOVED.git-id
rename to data/PPI-temp.graphml.REMOVED.git-id
diff --git a/data_drugstone/PPI/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.mitab.txt.REMOVED.git-id b/data/PPI/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.mitab.txt.REMOVED.git-id
similarity index 100%
rename from data_drugstone/PPI/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.mitab.txt.REMOVED.git-id
rename to data/PPI/BIOGRID-ORGANISM-Homo_sapiens-3.5.187.mitab.txt.REMOVED.git-id
diff --git a/data_drugstone/PPI/apid_9606_Q2.txt b/data/PPI/apid_9606_Q2.txt
similarity index 100%
rename from data_drugstone/PPI/apid_9606_Q2.txt
rename to data/PPI/apid_9606_Q2.txt
diff --git a/data_drugstone/PPI/reactome_homo-sapiens-protein-interactions.txt b/data/PPI/reactome_homo-sapiens-protein-interactions.txt
similarity index 100%
rename from data_drugstone/PPI/reactome_homo-sapiens-protein-interactions.txt
rename to data/PPI/reactome_homo-sapiens-protein-interactions.txt
diff --git a/data_drugstone/PPI/string_interactions.csv b/data/PPI/string_interactions.csv
similarity index 100%
rename from data_drugstone/PPI/string_interactions.csv
rename to data/PPI/string_interactions.csv
diff --git a/data_drugstone/Proteins/entrez_to_ensg.json b/data/Proteins/entrez_to_ensg.json
similarity index 100%
rename from data_drugstone/Proteins/entrez_to_ensg.json
rename to data/Proteins/entrez_to_ensg.json
diff --git a/data_drugstone/Proteins/protein_list.csv b/data/Proteins/protein_list.csv
similarity index 100%
rename from data_drugstone/Proteins/protein_list.csv
rename to data/Proteins/protein_list.csv
diff --git a/data_drugstone/drug-protein-interaction.txt b/data/drug-protein-interaction.txt
similarity index 100%
rename from data_drugstone/drug-protein-interaction.txt
rename to data/drug-protein-interaction.txt
diff --git a/data_drugstone/protein-file.txt b/data/protein-file.txt
similarity index 100%
rename from data_drugstone/protein-file.txt
rename to data/protein-file.txt
diff --git a/data_drugstone/protein_protein_interaction_file.txt b/data/protein_protein_interaction_file.txt
similarity index 100%
rename from data_drugstone/protein_protein_interaction_file.txt
rename to data/protein_protein_interaction_file.txt
diff --git a/data_drugstone/temp-PPDr.graphml.REMOVED.git-id b/data/temp-PPDr.graphml.REMOVED.git-id
similarity index 100%
rename from data_drugstone/temp-PPDr.graphml.REMOVED.git-id
rename to data/temp-PPDr.graphml.REMOVED.git-id
diff --git a/docker-compose.yml b/docker-compose.yml
index 310e7e9..0ea123a 100755
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,13 +6,14 @@ services:
     container_name: drugstone_backend
     command: 
       - "sh"
-      - "/usr/src/drugstone/docker-entrypoint.sh"
+      - "scripts/docker-entrypoint.sh"
     build: .
     env_file:
       - 'docker-django.env.dev'
     restart: always
     volumes:
-      - drugstone_backend_volume:/usr/src/drugstone/drugstone/migrations
+      - drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations
+      - drugstone_data_volume:/usr/src/drugstone/data
     ports:
       - 8001:8000
     networks:
@@ -30,7 +31,7 @@ services:
     networks:
       - drugstone_net
     volumes:
-      - drugstone_postgres_volume:/var/lib/postgresql/data_drugstone/
+      - drugstone_db_volume:/var/lib/postgresql/data
     environment:
       - POSTGRES_DB=drugstone
       - POSTGRES_USER=drugstone
@@ -53,15 +54,13 @@ services:
   celery:
     command: 
       - "sh"
-      - "/usr/src/drugstone/scripts/start_celery_worker.sh"
+      - "scripts/start_celery_worker.sh"
     restart: always
     image: drugstone_backend
     container_name: drugstone_celery
     hostname: drugstone_celery
     env_file:
       - './docker-django.env.dev'
-#    volumes:
-#      - ./:/usr/src/drugstone/
     depends_on:
       - redis
       - db
@@ -70,14 +69,12 @@ services:
   celery-beat:
     command: 
       - "sh"
-      - "/usr/src/drugstone/scripts/start_celery_beat.sh"
+      - "scripts/start_celery_beat.sh"
     image: drugstone_backend
     container_name: drugstone_celery_beat
     hostname: drugstone_celery_beat
     env_file:
       - './docker-django.env.dev'
-#    volumes:
-#      - ./:/usr/src/drugstone/
     depends_on:
       - redis
       - db
@@ -97,5 +94,9 @@ networks:
   drugstone_net:
 
 volumes:
-  drugstone_postgres_volume:
-  drugstone_backend_volume:
\ No newline at end of file
+  drugstone_db_volume:
+    external: true
+  drugstone_db_schema_volume:
+    external: true
+  drugstone_data_volume:
+    external: true
\ No newline at end of file
diff --git a/drugstone/backend_tasks.py b/drugstone/backend_tasks.py
index e8a6134..01f7827 100755
--- a/drugstone/backend_tasks.py
+++ b/drugstone/backend_tasks.py
@@ -40,7 +40,7 @@ def run_task(token, algorithm, parameters):
     r.set(f'{token}_job_id', f'{job_id}')
     r.set(f'{token}_started_at', str(datetime.now().timestamp()))
 
-    task_hook = TaskHook(json.loads(parameters), './data_drugstone/Networks/', set_progress, set_result)
+    task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result)
 
     try:
         if algorithm == 'dummy':
diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py
index dc3f052..b3af226 100644
--- a/drugstone/management/commands/import_from_nedrex.py
+++ b/drugstone/management/commands/import_from_nedrex.py
@@ -4,6 +4,7 @@ import python_nedrex as nedrex
 from python_nedrex.core import get_nodes, get_edges, get_api_key
 
 from drugstone import models
+from drugstone.management.includes.NodeCache import NodeCache
 
 
 def iter_node_collection(coll_name, eval):
@@ -35,7 +36,7 @@ def identify_updates(new_list, old_list):
     c = list()
     for id in new_list:
         if id not in old_list:
-            c.append(id)
+            c.append(new_list[id])
         elif new_list[id] != old_list[id]:
             old_list[id].update(new_list[id])
             u.append(old_list[id])
@@ -45,51 +46,33 @@ def identify_updates(new_list, old_list):
 def format_list(l):
     if l is not None and len(l) > 0:
         s = str(l)[1:]
-        return s[:len(s) - 1].replace("'","")
+        return s[:len(s) - 1].replace("'", "")
     return ""
 
 
-class nedrex_importer:
-    proteins = dict()
-    entrez_to_uniprot = dict()
-    gene_name_to_uniprot = defaultdict(lambda: set())
-    disorders = dict()
-    drugs = dict()
+def to_id(string):
+    idx = string.index('.')
+    return string[idx + 1:]
 
-    def __init__(self, base_url):
+
+class NedrexImporter:
+    cache: NodeCache = None
+
+    def __init__(self, base_url, cache: NodeCache):
+        self.cache = cache
         nedrex.config.set_url_base(base_url)
         api_key = get_api_key(accept_eula=True)
         nedrex.config.set_api_key(api_key)
 
-    def init_proteins(self):
-        if len(self.proteins) == 0:
-            print("Generating protein maps...")
-            for protein in models.Protein.objects.all():
-                self.proteins[protein.entrez] = protein
-                self.entrez_to_uniprot[protein.entrez] = protein.uniprot_code
-                self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
-
-    def init_drugs(self):
-        if len(self.drugs) == 0:
-            print("Generating drug map...")
-            for drug in models.Drug.objects.all():
-                self.drugs[drug.drug_id] = drug
-
-    def init_disorders(self):
-        if len(self.disorders) == 0:
-            print("Generating disorder map...")
-            for disorder in models.Disorder.objects.all():
-                self.disorders[disorder.mondo_id] = disorder
-
     def import_proteins(self, update: bool):
         proteins = dict()
         gene_to_prots = defaultdict(lambda: set())
 
         if update:
-            self.init_proteins()
+            self.cache.init_proteins()
 
         def add_protein(node):
-            id = node['primaryDomainId'].split('.')[1]
+            id = to_id(node['primaryDomainId'])
             name = node['geneName']
             if len(node['synonyms']) > 0:
                 name = node['synonyms'][0]
@@ -100,13 +83,13 @@ class nedrex_importer:
             proteins[id] = models.Protein(uniprot_code=id, protein_name=name, gene=node['geneName'])
 
         def add_edges(edge):
-            id = edge['sourceDomainId'].split('.')[1]
+            id = to_id(edge['sourceDomainId'])
             protein = proteins[id]
-            protein.entrez = edge['targetDomainId'].split('.')[1]
+            protein.entrez = to_id(edge['targetDomainId'])
             gene_to_prots[protein.entrez].add(id)
 
         def add_genes(node):
-            id = node['primaryDomainId'].split('.')[1]
+            id = to_id(node['primaryDomainId'])
             for prot_id in gene_to_prots[id]:
                 protein = proteins[prot_id]
                 try:
@@ -116,65 +99,177 @@ class nedrex_importer:
 
         iter_node_collection('protein', add_protein)
         iter_edge_collection('protein_encoded_by_gene', add_edges)
+
+        with_entrez = dict()
+        for ids in gene_to_prots.values():
+            for id in ids:
+                with_entrez[id] = proteins[id]
+        proteins = with_entrez
+
         iter_node_collection('gene', add_genes)
         # TODO test updating ideas
+
         if update:
-            (updates, creates) = identify_updates(proteins, self.proteins)
-            models.Protein.objects.bulk_update(updates)
+            (updates, creates) = identify_updates(proteins, self.cache.proteins)
+            for u in updates:
+                u.save()
             models.Protein.objects.bulk_create(creates)
             for protein in creates:
-                self.proteins[protein.uniprot_code] = protein
+                self.cache.proteins[protein.uniprot_code] = protein
         else:
             models.Protein.objects.bulk_create(proteins.values())
-            self.proteins = proteins
-        return len(self.proteins)
+            self.cache.proteins = proteins
+        return len(self.cache.proteins)
 
     def import_drugs(self, update):
         drugs = dict()
         if update:
-            self.init_drugs()
+            self.cache.init_drugs()
 
         def add_drug(node):
-            id = node['primaryDomainId'].split('.')[1]
+            id = to_id(node['primaryDomainId'])
             drugs[id] = models.Drug(drug_id=id, name=node['displayName'], status=format_list(node['drugGroups']))
 
         iter_node_collection('drug', add_drug)
 
         # TODO test updating ideas
         if update:
-            (updates, creates) = identify_updates(drugs, self.drugs)
-            models.Drug.objects.bulk_update(updates)
+            (updates, creates) = identify_updates(drugs, self.cache.drugs)
+            for u in updates:
+                u.save()
             models.Drug.objects.bulk_create(creates)
             for drug in creates:
-                self.drugs[drug.drug_id] = drug
+                self.cache.drugs[drug.drug_id] = drug
         else:
             models.Drug.objects.bulk_create(drugs.values())
-            self.drugs = drugs
+            self.cache.drugs = drugs
 
-        return len(self.drugs)
+        return len(self.cache.drugs)
 
     def import_disorders(self, update):
         disorders = dict()
         if update:
-            self.init_disorders()
+            self.cache.init_disorders()
 
         def add_disorder(node):
-            id = node['primaryDomainId'].split('.')[1]
+            id = to_id(node['primaryDomainId'])
             disorders[id] = models.Disorder(mondo_id=id, label=node['displayName'], icd10=format_list(node['icd10']))
 
         iter_node_collection('disorder', add_disorder)
 
         # TODO test updating ideas
         if update:
-            (updates, creates) = identify_updates(disorders, self.disorders)
-            models.Disorder.objects.bulk_update(updates)
+            (updates, creates) = identify_updates(disorders, self.cache.disorders)
+            for u in updates:
+                u.save()
             models.Disorder.objects.bulk_create(creates)
             for disorder in creates:
-                self.disorders[disorder.uniprot_code] = disorder
+                self.cache.disorders[disorder.mondo_id] = disorder
         else:
             models.Disorder.objects.bulk_create(disorders.values())
-            self.disorders = disorders
+            self.cache.disorders = disorders
+
+        return len(self.cache.disorders)
+
+    def import_drug_target_interactions(self, dataset, update):
+        self.cache.init_drugs()
+        self.cache.init_proteins()
+
+        if update:
+            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
+
+        bulk = set()
 
-        return len(self.disorders)
+        def add_dpi(edge):
+            try:
+                bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset,
+                                                       drug=self.cache.get_drug_by_drugbank(
+                                                           to_id(edge['sourceDomainId'])),
+                                                       protein=self.cache.get_protein_by_uniprot(
+                                                           to_id(edge['targetDomainId']))))
+            except KeyError:
+                pass
 
+        iter_edge_collection('drug_has_target', add_dpi)
+        models.ProteinDrugInteraction.objects.bulk_create(bulk)
+        return len(bulk)
 
+    def import_protein_protein_interactions(self, dataset, update):
+        self.cache.init_proteins()
+
+        if update:
+            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
+
+        bulk = list()
+
+        def iter_ppi(eval):
+            from python_nedrex import ppi
+            offset = 0
+            limit = 10000
+            while True:
+                result = ppi.ppis({"exp"}, skip=offset, limit=limit)
+                if not result:
+                    return
+                for edge in result:
+                    eval(edge)
+                offset += limit
+
+        def add_ppi(edge):
+            try:
+                bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset,
+                                                             from_protein=self.cache.get_protein_by_uniprot(
+                                                                 to_id(edge['memberOne'])),
+                                                             to_protein=self.cache.get_protein_by_uniprot(
+                                                                 to_id(edge['memberTwo']))))
+            except KeyError:
+                pass
+
+        iter_ppi(add_ppi)
+        models.ProteinProteinInteraction.objects.bulk_create(bulk)
+        return len(bulk)
+
+    def import_protein_disorder_associations(self, dataset, update):
+        self.cache.init_disorders()
+        self.cache.init_proteins()
+
+        if update:
+            models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
+
+        bulk = set()
+
+        def add_pdis(edge):
+            try:
+                disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
+                for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
+                    bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset,
+                                                               protein=protein,
+                                                               disorder=disorder, score=edge['score']))
+            except KeyError:
+                pass
+
+        iter_edge_collection('gene_associated_with_disorder', add_pdis)
+        models.ProteinDisorderAssociation.objects.bulk_create(bulk)
+        return len(bulk)
+
+    def import_drug_disorder_indications(self, dataset, update):
+        self.cache.init_disorders()
+        self.cache.init_drugs()
+
+        if update:
+            models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
+
+        bulk = set()
+
+        def add_drdis(edge):
+            try:
+                bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset,
+                                                       drug=self.cache.get_drug_by_drugbank(
+                                                           to_id(edge['sourceDomainId'])),
+                                                       disorder=self.cache.get_disorder_by_mondo(
+                                                           to_id(edge['targetDomainId']))))
+            except KeyError:
+                pass
+
+        iter_edge_collection('drug_has_indication', add_drdis)
+        models.DrugDisorderIndication.objects.bulk_create(bulk)
+        return len(bulk)
diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py
index 168c837..a6c3d81 100755
--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -80,7 +80,7 @@ def _internal_ppis(dataset_name: str) -> List[dict]:
 def create_gt(params: Tuple[str, str]) -> None:
     """Fetches all required information to build a graph-tools file for given
     PPI and PDI dataset names (params). Builds the graph-tools file and saves it in 
-    the data_drugstone/Networks folder.
+    the data/Networks folder.
 
     Args:
         params (Tuple[str, str]): Protein-protein-dataset name, Protein-drug-dataset name
@@ -176,7 +176,7 @@ def create_gt(params: Tuple[str, str]) -> None:
     print("done with drug edges")
 
     # save graph
-    filename = f"./data_drugstone/Networks/internal_{ppi_dataset}_{pdi_dataset}.gt"
+    filename = f"./data/Networks/internal_{ppi_dataset}_{pdi_dataset}.gt"
     g.save(filename)
     print(f"Created file {filename}")
     return
diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py
index 00f3d7c..66d878f 100755
--- a/drugstone/management/commands/populate_db.py
+++ b/drugstone/management/commands/populate_db.py
@@ -1,28 +1,20 @@
 from django.core.management.base import BaseCommand
-import pandas as pd
-from django.db import OperationalError, IntegrityError
+from django.db import OperationalError
 
-from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset, PDIDataset, Disorder, PDisDataset, DrDiDataset
-from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction
+from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset, PDIDataset, Disorder, PDisDataset, \
+    DrDiDataset, EnsemblGene
+from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction, ProteinDisorderAssociation, \
+    DrugDisorderIndication
 
 from drugstone.management.includes.DataPopulator import DataPopulator
-from .import_from_nedrex import nedrex_importer
+from .import_from_nedrex import NedrexImporter
+from drugstone.management.includes.NodeCache import NodeCache
+from drugstone.management.includes import DatasetLoader
 
 
 class DatabasePopulator:
-    def __init__(self, data_dir,
-                #  protein_file,
-                #  drug_file,
-                #  protein_protein_interaction_file,
-                #  protein_drug_interaction_file,
-                 tissue_expression_file,
-                 ):
+    def __init__(self, data_dir):
         self.data_dir = data_dir
-        # self.protein_file = protein_file
-        # self.drug_file = drug_file
-        # self.ppi_file = protein_protein_interaction_file
-        # self.pdi_file = protein_drug_interaction_file
-        self.exp_file = tissue_expression_file
 
     def delete_model(self, model):
         from django.db import connection
@@ -32,31 +24,37 @@ class DatabasePopulator:
         except OperationalError:
             cursor.execute('DELETE FROM "{0}"'.format(model._meta.db_table))
 
+    def delete_all(self):
+        models = ['PPI', 'PDI', 'DrDi', 'Protein', 'Drug', 'Disorder', 'PDi', 'Expression', 'Tissue']
+        self.delete_models(models)
+
     def delete_models(self, model_list):
         for model_name in model_list:
             print(f'Deleting {model_name} model ...')
 
             if model_name == 'PPI':
+                self.delete_model(PPIDataset)
                 self.delete_model(ProteinProteinInteraction)
             elif model_name == 'PDI':
+                self.delete_model(PDIDataset)
                 self.delete_model(ProteinDrugInteraction)
+            elif model_name == 'DrDi':
+                self.delete_model(DrDiDataset)
+                self.delete_model(DrugDisorderIndication)
             elif model_name == 'Protein':
                 self.delete_model(Protein)
+                self.delete_model(EnsemblGene)
             elif model_name == 'Drug':
                 self.delete_model(Drug)
             elif model_name == 'Disorder':
                 self.delete_model(Disorder)
-            elif model_name == 'PDiAssociations':
+            elif model_name == 'PDi':
                 self.delete_model(PDisDataset)
+                self.delete_model(ProteinDisorderAssociation)
+            elif model_name == 'Expression':
+                self.delete_model(ExpressionLevel)
             elif model_name == 'Tissue':
                 self.delete_model(Tissue)
-            elif model_name == 'PPIDataset':
-                self.delete_model(PPIDataset)
-            elif model_name == 'PDIDataset':
-                self.delete_model(PDIDataset)
-            elif model_name == 'DrDiDataset':
-                self.delete_model(DrDiDataset)
-
 
 
 class Command(BaseCommand):
@@ -65,104 +63,126 @@ class Command(BaseCommand):
         # dataset directory
         parser.add_argument('-dd', '--data_dir', type=str, help='Dataset directory path')
         parser.add_argument('-dm', '--delete_model', type=str, help='Delete model(s)')
+        parser.add_argument('-c', '--clear', action='store_true', help='Delete all models')
+        parser.add_argument('-a', '--all', action='store_true', help='Populate all tables')
+        parser.add_argument('-u', '--update', action='store_true', help='Execute database update for selected tables')
 
         parser.add_argument('-p', '--proteins', action='store_true', help='Populate Proteins')
         parser.add_argument('-di', '--disorders', action='store_true', help='Populate Disorders')
         parser.add_argument('-dr', '--drugs', action='store_true', help='Drug file name')
 
-        parser.add_argument('-exp', '--exp_file', type=str, help='Tissue expression file (.gct without first 2 lines)')
+        parser.add_argument('-exp', '--exp', action='store_true',
+                            help='Tissue expression file (.gct without first 2 lines)')
 
-        parser.add_argument('-pp', '--protein_protein', type=str, help='Populate Protein-Protein Interactions')
-        parser.add_argument('-pdr', '--protein_drug', type=str, help='Populate Protein-Drug Interactions')
-        parser.add_argument('-pdi', '--protein_disorder', type=str, help='Populate Protein-Disorder Associations')
-        parser.add_argument('-ddi', '--drug_disorder', type=str, help='Populate Drug-Disorder Indications')
+        parser.add_argument('-pp', '--protein_protein', action='store_true',
+                            help='Populate Protein-Protein Interactions')
+        parser.add_argument('-pdr', '--protein_drug', action='store_true', help='Populate Protein-Drug Interactions')
+        parser.add_argument('-pdi', '--protein_disorder', action='store_true',
+                            help='Populate Protein-Disorder Associations')
+        parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
 
     def handle(self, *args, **kwargs):
-
+        nedrex_api_url = "http://82.148.225.92:8123/"
         data_dir = kwargs['data_dir']
-        exp_file = kwargs['exp_file']
-
-        # p = kwargs['proteins']
-        # pp = kwargs['protein_protein']
-        # pd = kwargs['protein_drug']
 
+        db_populator = DatabasePopulator(data_dir=data_dir)
 
-        db_populator = DatabasePopulator(data_dir=data_dir,
-                                        # protein_file=protein_file,
-                                        # drug_file=drug_file,
-                                        # protein_protein_interaction_file=ppi_file,
-                                        # protein_drug_interaction_file=pdi_file,
-                                        tissue_expression_file=exp_file,
-                                        )
-
-        importer = nedrex_importer("http://82.148.225.92:8123/")
+        if kwargs['clear']:
+            db_populator.delete_all()
 
         if kwargs['delete_model'] is not None:
             model_list = kwargs['delete_model'].split(',')
             db_populator.delete_models(model_list)
-            return
 
-        populator = DataPopulator()
+        cache = NodeCache()
+        update = True if kwargs['update'] else False
+        importer = NedrexImporter(nedrex_api_url, cache)
+        populator = DataPopulator(cache)
+
+        if kwargs['all']:
+            kwargs['drugs'] = True
+            kwargs['disorders'] = True
+            kwargs['proteins'] = True
+            kwargs['exp'] = True
+            kwargs['protein_protein'] = True
+            kwargs['protein_drug'] = True
+            kwargs['protein_disorder'] = True
+            kwargs['drug_disorder'] = True
 
         if kwargs['drugs']:
             print('Populating Drugs...')
-            # n = DataPopulator.populate_drugs(populator)
-            n = nedrex_importer.import_drugs(importer,False)
+            n = NedrexImporter.import_drugs(importer, update)
             print(f'Populated {n} Drugs.')
 
-
-        if kwargs['exp_file'] is not None:
-            print('Populating Expressions...')
-            n = DataPopulator.populate_expessions(populator)
-            print(f'Populated {n} Expressions.')
+        if kwargs['disorders']:
+            print('Populating Disorders...')
+            n = NedrexImporter.import_disorders(importer, update)
+            print(f'Populated {n} Disorders.')
 
         if kwargs['proteins']:
             print('Populating Proteins...')
-
-            n = nedrex_importer.import_proteins(importer, False)
-            # n = DataPopulator.populate_proteins(populator)
+            n = NedrexImporter.import_proteins(importer, update)
             print(f'Populated {n} Proteins.')
-            
-            # print('Populating ENSG IDs...')
-            # n = DataPopulator.populate_ensg(populator)
-            # print(f'Populated {n} ENSG IDs.')
+            print('Populating ENSG IDs...')
+            n = DataPopulator.populate_ensg(populator,update)
+            print(f'Populated {n} ENSG IDs.')
 
-        if kwargs['disorders']:
-            print('Populating Disorders...')
-            n = nedrex_importer.import_disorders(importer, False)
-            # n = DataPopulator.populate_disorders(populator)
-            print(f'Populated {n} Disorders.')
+        if kwargs['exp']:
+            print('Populating Expressions...')
+            n = DataPopulator.populate_expressions(populator, update)
+            print(f'Populated {n} Expressions.')
 
-        if kwargs['protein_protein'] is not None:
+        if kwargs['protein_protein']:
+            print('Importing PPIs from NeDRexDB...')
+            n = NedrexImporter.import_protein_protein_interactions(importer,
+                                                               DatasetLoader.get_ppi_nedrex(nedrex_api_url),
+                                                               update)
+            print(f'Imported {n} PPIs from NeDRexDB')
             print('Populating PPIs from STRING...')
-            n = DataPopulator.populate_ppi_string(populator)
+            n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
             print(f'Populated {n} PPIs from STRING.')
 
             print('Populating PPIs from APID...')
-            n = DataPopulator.populate_ppi_apid(populator)
+            n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
             print(f'Populated {n} PPIs from APID.')
 
             print('Populating PPIs from BioGRID...')
-            n = DataPopulator.populate_ppi_biogrid(populator)
+            n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
             print(f'Populated {n} PPIs from BioGRID.')
 
-        if kwargs['protein_drug'] is not None:
+        if kwargs['protein_drug']:
+            print('Importing PDIs from NeDRexDB...')
+            n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update)
+            print(f'Imported {n} PDIs from NeDRexDB')
+
             print('Populating PDIs from Chembl...')
-            n = DataPopulator.populate_pdi_chembl(populator)
+            n = DataPopulator.populate_pdi_chembl(populator,DatasetLoader.get_drug_target_chembl(), update)
             print(f'Populated {n} PDIs from Chembl.')
 
             print('Populating PDIs from DGIdb...')
-            n = DataPopulator.populate_pdi_dgidb(populator)
+            n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
             print(f'Populated {n} PDIs from DGIdb.')
 
             print('Populating PDIs from DrugBank...')
-            n = DataPopulator.populate_pdi_drugbank(populator)
+            n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
             print(f'Populated {n} PDIs from DrugBank.')
-        if kwargs['protein_disorder'] is not None:
+
+        if kwargs['protein_disorder']:
+            print('Importing PDis from NeDRexDB...')
+            n = NedrexImporter.import_protein_disorder_associations(importer,
+                                                               DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url),
+                                                               update)
+            print(f'Imported {n} PDis from NeDRexDB')
             print('Populating PDis associations from DisGeNET...')
-            n=DataPopulator.populate_pdis_disgenet(populator)
+            n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
             print(f'Populated {n} PDis associations from DisGeNET.')
-        if kwargs['drug_disorder'] is not None:
+
+        if kwargs['drug_disorder']:
+            print('Importing DrDis from NeDRexDB...')
+            n = NedrexImporter.import_drug_disorder_indications(importer,
+                                                               DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
+                                                               update)
+            print(f'Imported {n} DrDis from NeDRexDB')
             print('Populating DrDi indications from DrugBank...')
-            n=DataPopulator.populate_drdis_drugbank(populator)
+            n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
             print(f'Populated {n} DrDi associations from DrugBank.')
diff --git a/drugstone/management/commands/test.py b/drugstone/management/commands/test.py
index 5afbea2..4d01dd4 100644
--- a/drugstone/management/commands/test.py
+++ b/drugstone/management/commands/test.py
@@ -1,5 +1,6 @@
 import python_nedrex as nedrex
 from python_nedrex.core import get_nodes, get_edges, get_api_key
+from python_nedrex.static import get_metadata
 
 def iter_node_collection(coll_name, eval):
     offset = 0
@@ -25,9 +26,26 @@ def iter_edge_collection(coll_name, eval):
         offset += limit
 
 
+def iter_ppi(eval):
+    from python_nedrex import ppi
+    offset = 0
+    limit = 1000
+    while True:
+        result = ppi.ppis({"exp"},skip = offset, limit=limit)
+        if not result:
+            return
+        for edge in result:
+            eval(edge)
+        offset += limit
+
 base_url = "http://82.148.225.92:8123/"
 nedrex.config.set_url_base(base_url)
 api_key = get_api_key(accept_eula=True)
 nedrex.config.set_api_key(api_key)
+print(f'Nodes: {nedrex.core.get_node_types()}')
+print(f'Edges: {nedrex.core.get_edge_types()}')
+print(f'{get_metadata()}')
+
 
-iter_edge_collection("gene_expressed_in_tissue", lambda node: {print(node)})
\ No newline at end of file
+iter_ppi(lambda node: print(node))
+# iter_edge_collection("gene_expressed_in_tissue", lambda node: {print(node)})
\ No newline at end of file
diff --git a/drugstone/management/includes/DataLoader.py b/drugstone/management/includes/DataLoader.py
index 8fa0d2a..ca31b56 100755
--- a/drugstone/management/includes/DataLoader.py
+++ b/drugstone/management/includes/DataLoader.py
@@ -3,14 +3,14 @@ import json
 
 
 class DataLoader:
-    PATH_PROTEINS = 'data_drugstone/Proteins/'
-    PATH_DRUGS = 'data_drugstone/Drugs/'
-    PATH_EXPR = 'data_drugstone/'
-    PATH_DISORDERS = 'data_drugstone/Disorders/'
-    PATH_PDI = 'data_drugstone/PDI/'
-    PATH_PPI = 'data_drugstone/PPI/'
-    PATH_PDi = 'data_drugstone/PDi/'
-    PATH_DDi = 'data_drugstone/DrDi/'
+    PATH_PROTEINS = 'data/Proteins/'
+    PATH_DRUGS = 'data/Drugs/'
+    PATH_EXPR = 'data/Expression/'
+    PATH_DISORDERS = 'data/Disorders/'
+    PATH_PDI = 'data/PDI/'
+    PATH_PPI = 'data/PPI/'
+    PATH_PDi = 'data/PDi/'
+    PATH_DDi = 'data/DrDi/'
 
     # Proteins
     PROTEINS_COVEX = 'protein_list.csv'
@@ -230,7 +230,7 @@ class DataLoader:
         Returns:
             pd.DataFrame: columns "protein_name", "disorder_name" and "score"
         """
-        return pd.read_csv(f'{DataLoader.PATH_PDi}{DataLoader.PDi_DISGENET}', sep='\t')
+        return pd.read_csv(f'{DataLoader.PATH_PDi}{DataLoader.PDi_DISGENET}', sep='\t', dtype={'disorder_name':str, 'protein_name':str, 'score':float})
 
     @staticmethod
     def load_drdis_drugbank() -> pd.DataFrame:
@@ -239,7 +239,7 @@ class DataLoader:
         Returns:
             pd.DataFrame: columns "drugbank_id" and "mondo_id"
         """
-        return pd.read_csv(f'{DataLoader.PATH_DDi}{DataLoader.DDi_DRUGBANK}', sep='\t')
+        return pd.read_csv(f'{DataLoader.PATH_DDi}{DataLoader.DDi_DRUGBANK}', sep='\t', dtype={'drugbank_id':str, 'mondo_id':str})
 
     @staticmethod
     def load_pdi_dgidb() -> pd.DataFrame:
diff --git a/drugstone/management/includes/DataPopulator.py b/drugstone/management/includes/DataPopulator.py
index 4056232..5858202 100755
--- a/drugstone/management/includes/DataPopulator.py
+++ b/drugstone/management/includes/DataPopulator.py
@@ -1,88 +1,18 @@
-from collections import defaultdict
-
 from drugstone.management.includes.DataLoader import DataLoader
 import drugstone.models as models
+from drugstone.management.includes.NodeCache import NodeCache
 
 
 class DataPopulator:
-    proteins = dict()
-    uniprot_to_ensembl = dict()
-    gene_name_to_ensembl = defaultdict(lambda: set())
-    disorders = dict()
-    drugs = dict()
-
-    def init_proteins(self):
-        if len(self.proteins) == 0:
-            print("Generating protein maps...")
-            for protein in models.Protein.objects.all():
-                self.proteins[protein.entrez]=protein
-                self.uniprot_to_ensembl[protein.uniprot_code] = protein.entrez
-                self.gene_name_to_ensembl[protein.gene].add(protein.entrez)
-
-    def init_drugs(self):
-        if len(self.drugs)== 0:
-            print("Generating drug map...")
-            for drug in models.Drug.objects.all():
-                self.drugs[drug.drug_id]=drug
-
-    def init_disorders(self):
-        if len(self.disorders) == 0:
-            print("Generating disorder map...")
-            for disorder in models.Disorder.objects.all():
-                self.disorders[disorder.mondo_id]=disorder
-
-    # def populate_proteins(self) -> int:
-    #     """ Populates the Protein table in the django database.
-    #     Handles loading the data and passing it to the django database
-    #
-    #     Returns:
-    #         int: Count of how many proteins were added
-    #     """
-    #     df = DataLoader.load_proteins()
-    #     for _, row in df.iterrows():
-    #         self.proteins[row['entrez_id']] = models.Protein(
-    #             uniprot_code=row['protein_ac'],
-    #             gene=row['gene_name'],
-    #             entrez=row['entrez_id'],
-    #             protein_name=row['protein_name'])
-    #         self.uniprot_to_ensembl[row['protein_ac']] = row['entrez_id']
-    #         self.gene_name_to_ensembl[row['gene_name']].add(row['entrez_id'])
-    #
-    #     models.Protein.objects.bulk_create(self.proteins.values())
-    #     return len(self.proteins)
-    #
-    # def populate_disorders(self) -> int:
-    #     """ Populates the Disorder table in the django database.
-    #     Handles loading the data and passing it to the django database
-    #
-    #     Returns:
-    #         int: Count of how many disorders were added
-    #     """
-    #     df = DataLoader.load_disorders()
-    #     for _, row in df.iterrows():
-    #         self.disorders[row['mondo_id']] = models.Disorder(
-    #             mondo_id=row['mondo_id'],
-    #             label=row['label'],
-    #             icd10=row['icd10']
-    #         )
-    #     models.Disorder.objects.bulk_create(self.disorders.values())
-    #     return len(self.disorders)
-    #
-    # def populate_drugs(self):
-    #     df = DataLoader.load_drugs()
-    #     for _, row in df.iterrows():
-    #         drug_id = row['drug_id']
-    #         drug_name = row['drug_name']
-    #         drug_status = row['drug_status']
-    #         self.drugs[drug_id] = models.Drug(
-    #             drug_id=drug_id,
-    #             name=drug_name,
-    #             status=drug_status)
-    #     models.Drug.objects.bulk_create(self.drugs.values())
-    #     return len(self.drugs)
-
-    def populate_expessions(self):
-        self.init_proteins()
+
+    def __init__(self, cache: NodeCache):
+        self.cache = cache
+
+    def populate_expressions(self, update):
+        if update:
+            models.ExpressionLevel.objects.all().delete()
+
+        self.cache.init_proteins()
         df = DataLoader.load_expressions()
 
         tissues_models = dict()
@@ -94,28 +24,34 @@ class DataPopulator:
             tissues_models[tissue_name] = tissue_model
 
         proteins_linked = 0
-        unique = set()
-        bulk = list()
+        bulk = set()
+        uniq = set()
 
+        size = 0
         for _, row in df.iterrows():
             gene_name = row['Description']
 
-            for protein_id in self.gene_name_to_ensembl[gene_name]:
-                protein_model = self.proteins[protein_id]
+            for protein_model in self.cache.get_proteins_by_gene(gene_name):
                 proteins_linked += 1
 
                 for tissue_name, tissue_model in tissues_models.items():
-                    id = f"{tissue_name}_{protein_id}"
-                    if id in unique:
+                    expr = models.ExpressionLevel(protein=protein_model,
+                                                  tissue=tissue_model,
+                                                  expression_level=row[tissue_name])
+                    id = hash(expr)
+                    if id in uniq:
                         continue
-                    unique.add(id)
-                    bulk.append(models.ExpressionLevel(protein=protein_model,
-                                                       tissue=tissue_model,
-                                                       expression_level=row[tissue_name]))
+                    uniq.add(id)
+                    bulk.add(expr)
+            if len(bulk) > 100000:
+                models.ExpressionLevel.objects.bulk_create(bulk)
+                size += len(bulk)
+                bulk = set()
+
         models.ExpressionLevel.objects.bulk_create(bulk)
-        return len(bulk)
+        return size + len(bulk)
 
-    def populate_ensg(self) -> int:
+    def populate_ensg(self,update) -> int:
         """ Populates the Ensembl-Gene table in the django database.
         Also maps the added ensg entries to the corresponding proteins.
         Handles loading the data and passing it to the django database
@@ -123,76 +59,78 @@ class DataPopulator:
         Returns:
             int: Count of how many ensg-protein relations were added
         """
-        self.init_proteins()
+        if update:
+            models.EnsemblGene.objects.all().delete()
+        self.cache.init_proteins()
         data = DataLoader.load_ensg()
         bulk = list()
+
         for entrez, ensg_list in data.items():
-            protein = self.proteins[entrez]
-            for ensg in ensg_list:
-                bulk.append(models.EnsemblGene(name=ensg, protein=protein))
+            proteins = self.cache.get_proteins_by_entrez(entrez)
+            for protein in proteins:
+                for ensg in ensg_list:
+                    bulk.append(models.EnsemblGene(name=ensg, protein=protein))
         models.EnsemblGene.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_ppi_string(self) -> int:
+    def populate_ppi_string(self, dataset, update) -> int:
         """ Populates the Protein-Protein-Interactions from STRINGdb
         Handles loading the data and passing it to the django database
 
         Returns:
             int: Count of how many interactions were added
         """
-        self.init_proteins()
+        self.cache.init_proteins()
+        if update:
+            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
+
         df = DataLoader.load_ppi_string()
-        dataset, _ = models.PPIDataset.objects.get_or_create(
-            name='STRING',
-            link='https://string-db.org/',
-            version='11.0'
-        )
         bulk = list()
         for _, row in df.iterrows():
             try:
                 # try fetching proteins
-                protein_a = self.proteins[row['entrez_a']]
-                protein_b = self.proteins[row['entrez_b']]
+                proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
+                proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
             except KeyError:
                 # continue if not found
                 continue
-            try:
-                bulk.append(models.ProteinProteinInteraction(
-                    ppi_dataset=dataset,
-                    from_protein=protein_a,
-                    to_protein=protein_b
-                ))
-            except models.ValidationError:
-                # duplicate
-                continue
+            for protein_a in proteins_a:
+                for protein_b in proteins_b:
+                    try:
+                        bulk.append(models.ProteinProteinInteraction(
+                            ppi_dataset=dataset,
+                            from_protein=protein_a,
+                            to_protein=protein_b
+                        ))
+                    except models.ValidationError:
+                        # duplicate
+                        continue
         models.ProteinProteinInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_ppi_apid(self) -> int:
+    def populate_ppi_apid(self, dataset, update) -> int:
         """ Populates the Protein-Protein-Interactions from Apid
         Handles loading the data and passing it to the django database
 
         Returns:
             int: Count of how many interactions were added
         """
-        self.init_proteins()
+        self.cache.init_proteins()
+
+        if update:
+            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
         df = DataLoader.load_ppi_apid()
-        dataset, _ = models.PPIDataset.objects.get_or_create(
-            name='APID',
-            link='http://cicblade.dep.usal.es:8080/APID/',
-            version='January 2019'
-        )
-        bulk = list()
+        bulk = set()
         for _, row in df.iterrows():
             try:
                 # try fetching proteins
-                protein_a = self.proteins[self.uniprot_to_ensembl[row['from_protein_ac']]]
-                protein_b = self.proteins[self.uniprot_to_ensembl[row['to_protein_ac']]]
+                protein_a = self.cache.get_protein_by_uniprot(row['from_protein_ac'])
+                protein_b = self.cache.get_protein_by_uniprot(row['to_protein_ac'])
             except KeyError:
                 # continue if not found
                 continue
             try:
-                bulk.append(models.ProteinProteinInteraction(
+                bulk.add(models.ProteinProteinInteraction(
                     ppi_dataset=dataset,
                     from_protein=protein_a,
                     to_protein=protein_b
@@ -202,71 +140,69 @@ class DataPopulator:
         models.ProteinProteinInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_ppi_biogrid(self) -> int:
+    def populate_ppi_biogrid(self,dataset, update) -> int:
         """ Populates the Protein-Protein-Interactions from BioGRID
         Handles loading the data and passing it to the django database
 
         Returns:
             int: Count of how many interactions were added
         """
-        self.init_proteins()
+        self.cache.init_proteins()
+
+        if update:
+            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
         df = DataLoader.load_ppi_biogrid()
-        dataset, _ = models.PPIDataset.objects.get_or_create(
-            name='BioGRID',
-            link='https://thebiogrid.org/',
-            version='4.0'
-        )
         bulk = list()
         for _, row in df.iterrows():
             try:
                 # try fetching proteins
-                protein_a = self.proteins[row['entrez_a']]
-                protein_b = self.proteins[row['entrez_b']]
+                proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
+                proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
             except KeyError:
                 # TODO update error
                 # continue if not found
                 continue
-            try:
-                bulk.append(models.ProteinProteinInteraction(
-                    ppi_dataset=dataset,
-                    from_protein=protein_a,
-                    to_protein=protein_b
-                ))
-            except models.ValidationError:
-                # duplicate
-                continue
+            for protein_a in proteins_a:
+                for protein_b in proteins_b:
+                    try:
+                        bulk.append(models.ProteinProteinInteraction(
+                            ppi_dataset=dataset,
+                            from_protein=protein_a,
+                            to_protein=protein_b
+                        ))
+                    except models.ValidationError:
+                        # duplicate
+                        continue
         models.ProteinProteinInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_pdi_chembl(self) -> int:
+    def populate_pdi_chembl(self,dataset, update) -> int:
         """ Populates the Protein-Drug-Interactions from Chembl
         Handles Loading the data and passing it to the django database
 
         Returns:
             int: Count of how many interactions were added
         """
-        self.init_proteins()
-        self.init_drugs()
+        self.cache.init_proteins()
+        self.cache.init_drugs()
+
+        if update:
+            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
         df = DataLoader.load_pdi_chembl()
-        dataset, _ = models.PDIDataset.objects.get_or_create(
-            name='ChEMBL',
-            link='https://www.ebi.ac.uk/chembl/',
-            version='27',
-        )
-        bulk = list()
+        bulk = set()
         for _, row in df.iterrows():
             try:
-                protein = self.proteins[self.uniprot_to_ensembl[row['protein_ac']]]
+                protein = self.cache.get_protein_by_uniprot(row['protein_ac'])
             except KeyError:
                 # continue if not found
                 continue
             try:
                 # try fetching drug
-                drug = self.drugs[row['drug_id']]
+                drug = self.cache.get_drug_by_drugbank(row['drug_id'])
             except KeyError:
                 # continue if not found
                 continue
-            bulk.append(models.ProteinDrugInteraction(
+            bulk.add(models.ProteinDrugInteraction(
                 pdi_dataset=dataset,
                 protein=protein,
                 drug=drug
@@ -274,36 +210,35 @@ class DataPopulator:
         models.ProteinDrugInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_pdis_disgenet(self,) -> int:
+    def populate_pdis_disgenet(self, dataset, update) -> int:
         """ Populates the Protein-Disorder-Interactions from DisGeNET
         Handles Loading the data and passing it to the django database
 
         Returns:
             int: Count of how many interactions were added
         """
-        self.init_proteins()
-        self.init_disorders()
+        self.cache.init_proteins()
+        self.cache.init_disorders()
+
+
+        if update:
+            models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
         df = DataLoader.load_pdis_disgenet()
-        dataset, _ = models.PDisDataset.objects.get_or_create(
-            name='DisGeNET',
-            link='https://www.disgenet.org/home/',
-            version='6.0',
-        )
-        bulk = list()
+        bulk = set()
         for _, row in df.iterrows():
             try:
                 # try fetching protein
-                protein = self.proteins[self.uniprot_to_ensembl[row['protein_name']]]
+                protein = self.cache.get_protein_by_uniprot(row['protein_name'])
             except KeyError:
                 # continue if not found
                 continue
             try:
-                # try fetching drug
-                disorder = self.disorders[str(int(row['disorder_name']))]
+                # try fetching disorder
+                disorder = self.cache.get_disorder_by_mondo(row['disorder_name'])
             except KeyError:
                 # continue if not found
                 continue
-            bulk.append(models.ProteinDisorderAssociation(
+            bulk.add(models.ProteinDisorderAssociation(
                 pdis_dataset=dataset,
                 protein=protein,
                 disorder=disorder,
@@ -312,36 +247,34 @@ class DataPopulator:
         models.ProteinDisorderAssociation.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_drdis_drugbank(self) -> int:
+    def populate_drdis_drugbank(self, dataset, update) -> int:
         """ Populates the Drug-Disorder-Indications from DrugBank
         Handles Loading the data and passing it to the django database
 
         Returns:
             int: Count of how many edges were added
         """
-        self.init_drugs()
-        self.init_disorders()
+        self.cache.init_drugs()
+        self.cache.init_disorders()
+        if update:
+            models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
+
         df = DataLoader.load_drdis_drugbank()
-        dataset, _ = models.DrDiDataset.objects.get_or_create(
-            name='DrugBank',
-            link='https://go.drugbank.com/',
-            version='5.1.8',
-        )
-        bulk = list()
+        bulk = set()
         for _, row in df.iterrows():
             try:
                 # try fetching protein
-                drug = self.drugs[row['drugbank_id']]
+                drug = self.cache.get_drug_by_drugbank(row['drugbank_id'])
             except KeyError:
                 # continue if not found
                 continue
             try:
                 # try fetching drug
-                disorder = self.disorders[str(int(row['mondo_id']))]
+                disorder = self.cache.get_disorder_by_mondo(row['mondo_id'])
             except KeyError:
                 # continue if not found
                 continue
-            bulk.append(models.DrugDisorderIndication(
+            bulk.add(models.DrugDisorderIndication(
                 drdi_dataset=dataset,
                 drug=drug,
                 disorder=disorder,
@@ -349,76 +282,75 @@ class DataPopulator:
         models.DrugDisorderIndication.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_pdi_dgidb(self) -> int:
+    def populate_pdi_dgidb(self,dataset, update) -> int:
         """ Populates the Protein-Drug-Interactions from DGIdb
         Handles Loading the data and passing it to the django database
 
         Returns:
             int: Count of how many interactions were added
         """
-        self.init_proteins()
-        self.init_drugs()
+        self.cache.init_proteins()
+        self.cache.init_drugs()
+
+        if update:
+            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
         df = DataLoader.load_pdi_dgidb()
-        dataset, _ = models.PDIDataset.objects.get_or_create(
-            name='DGIdb',
-            link='https://www.dgidb.org/',
-            version='4.2.0'
-        )
-        bulk = list()
+        bulk = set()
         for _, row in df.iterrows():
             try:
                 # try fetching protein
-                protein = self.proteins[row['entrez_id']]
+                proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
             except KeyError:
                 # continue if not found
                 continue
             try:
                 # try fetching drug
-                drug = self.drugs[row['drug_id']]
+                drug = self.cache.get_drug_by_drugbank(row['drug_id'])
             except KeyError:
                 # continue if not found
                 continue
-            bulk.append(models.ProteinDrugInteraction(
-                pdi_dataset=dataset,
-                protein=protein,
-                drug=drug
-            ))
+            for protein in proteins:
+                bulk.add(models.ProteinDrugInteraction(
+                    pdi_dataset=dataset,
+                    protein=protein,
+                    drug=drug
+                ))
         models.ProteinDrugInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
-    def populate_pdi_drugbank(self) -> int:
+    def populate_pdi_drugbank(self,dataset, update) -> int:
         """ Populates the Protein-Drug-Interactions from Drugbank
         Handles Loading the data and passing it to the django database
 
         Returns:
             int: Count of how many interactions were added
         """
-        self.init_proteins()
-        self.init_drugs()
+        self.cache.init_proteins()
+        self.cache.init_drugs()
+
+
+        if update:
+            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
         df = DataLoader.load_pdi_drugbank()
-        dataset, _ = models.PDIDataset.objects.get_or_create(
-            name='DrugBank',
-            link='https://go.drugbank.com/',
-            version='5.1.7'
-        )
-        bulk = list()
+        bulk = set()
         for _, row in df.iterrows():
             try:
                 # try fetching protein
-                protein = self.proteins[row['entrez_id']]
+                proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
             except KeyError:
                 # continue if not found
                 continue
             try:
                 # try fetching drug
-                drug = self.drugs[row['drug_id']]
+                drug = self.cache.get_drug_by_drugbank(row['drug_id'])
             except KeyError:
                 # continue if not found
                 continue
-            bulk.append(models.ProteinDrugInteraction(
-                pdi_dataset=dataset,
-                protein=protein,
-                drug=drug
-            ))
+            for protein in proteins:
+                bulk.add(models.ProteinDrugInteraction(
+                    pdi_dataset=dataset,
+                    protein=protein,
+                    drug=drug
+                ))
         models.ProteinDrugInteraction.objects.bulk_create(bulk)
         return len(bulk)
diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py
new file mode 100644
index 0000000..f608ed1
--- /dev/null
+++ b/drugstone/management/includes/DatasetLoader.py
@@ -0,0 +1,99 @@
+from drugstone import models
+from python_nedrex.static import get_metadata
+
+def get_ppi_string():
+    dataset, _ = models.PPIDataset.objects.get_or_create(
+        name='STRING',
+        link='https://string-db.org/',
+        version='11.0'
+    )
+    return dataset
+
+def get_ppi_apid():
+    dataset, _ = models.PPIDataset.objects.get_or_create(
+        name='APID',
+        link='http://cicblade.dep.usal.es:8080/APID/',
+        version='January 2019'
+    )
+    return dataset
+
+def get_ppi_biogrid():
+    dataset, _ = models.PPIDataset.objects.get_or_create(
+        name='BioGRID',
+        link='https://thebiogrid.org/',
+        version='4.0'
+    )
+    return dataset
+
+def get_drug_target_nedrex(url):
+    dataset, _ = models.PDIDataset.objects.get_or_create(
+        name='NeDRex',
+        link=url,
+        version=get_metadata()['version'],
+    )
+    return dataset
+
+def get_ppi_nedrex(url):
+    dataset, _ = models.PPIDataset.objects.get_or_create(
+        name='NeDRex',
+        link=url,
+        version=get_metadata()['version'],
+    )
+    return dataset
+
+def get_protein_disorder_nedrex(url):
+    dataset, _ = models.PDisDataset.objects.get_or_create(
+        name='NeDRex',
+        link=url,
+        version=get_metadata()['version'],
+    )
+    return dataset
+
+def get_drug_disorder_nedrex(url):
+    dataset, _ = models.DrDiDataset.objects.get_or_create(
+        name='NeDRex',
+        link=url,
+        version=get_metadata()['version'],
+    )
+    return dataset
+
+def get_drug_target_chembl():
+    dataset, _ = models.PDIDataset.objects.get_or_create(
+        name='ChEMBL',
+        link='https://www.ebi.ac.uk/chembl/',
+        version='27',
+    )
+    return dataset
+
+def get_drug_target_dgidb():
+    dataset, _ = models.PDIDataset.objects.get_or_create(
+        name='DGIdb',
+        link='https://www.dgidb.org/',
+        version='4.2.0'
+    )
+    return dataset
+
+def get_drug_target_drugbank():
+    dataset, _ = models.PDIDataset.objects.get_or_create(
+        name='DrugBank',
+        link='https://go.drugbank.com/',
+        version='5.1.7'
+    )
+    return dataset
+
+def get_disorder_protein_disgenet():
+    dataset, _ = models.PDisDataset.objects.get_or_create(
+        name='DisGeNET',
+        link='https://www.disgenet.org/home/',
+        version='6.0',
+    )
+    return dataset
+
+
+def get_drug_disorder_drugbank():
+    dataset, _ = models.DrDiDataset.objects.get_or_create(
+        name='DrugBank',
+        link='https://go.drugbank.com/',
+        version='5.1.8',
+    )
+    return dataset
diff --git a/drugstone/management/includes/NodeCache.py b/drugstone/management/includes/NodeCache.py
new file mode 100644
index 0000000..7f9491c
--- /dev/null
+++ b/drugstone/management/includes/NodeCache.py
@@ -0,0 +1,60 @@
+from collections import defaultdict
+import drugstone.models as models
+
+
+class NodeCache:
+
+    proteins = dict()
+    entrez_to_uniprot = defaultdict(lambda: set())
+    gene_name_to_uniprot = defaultdict(lambda: set())
+    disorders = dict()
+    drugs = dict()
+
+    def init_protein_maps(self):
+        print("Generating protein id maps...")
+        for protein in self.proteins.values():
+            self.entrez_to_uniprot[protein.entrez].add(protein.uniprot_code)
+            self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
+
+    def init_proteins(self):
+        if len(self.proteins) == 0:
+            print("Generating protein maps...")
+            for protein in models.Protein.objects.all():
+                self.proteins[protein.uniprot_code] = protein
+        if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
+            self.init_protein_maps()
+
+
+    def init_drugs(self):
+        if len(self.drugs) == 0:
+            print("Generating drug map...")
+            for drug in models.Drug.objects.all():
+                self.drugs[drug.drug_id] = drug
+
+    def init_disorders(self):
+        if len(self.disorders) == 0:
+            print("Generating disorder map...")
+            for disorder in models.Disorder.objects.all():
+                self.disorders[disorder.mondo_id] = disorder
+
+
+    def get_protein_by_uniprot(self,uniprot_id):
+        return self.proteins[uniprot_id]
+
+    def get_proteins_by_entrez(self,entrez_id):
+        out = list()
+        for g in self.entrez_to_uniprot[entrez_id]:
+            out.append(self.proteins[g])
+        return out
+
+    def get_proteins_by_gene(self, gene_name):
+        out = list()
+        for g in self.gene_name_to_uniprot[gene_name]:
+            out.append(self.proteins[g])
+        return out
+
+    def get_drug_by_drugbank(self, drugbank_id):
+        return self.drugs[drugbank_id]
+
+    def get_disorder_by_mondo(self, mondo_id):
+        return self.disorders[mondo_id]
\ No newline at end of file
diff --git a/drugstone/models.py b/drugstone/models.py
index 9299122..4f160db 100755
--- a/drugstone/models.py
+++ b/drugstone/models.py
@@ -5,13 +5,6 @@ from django.db import models
 # Main biological and medical entities
 
 
-class Tissue(models.Model):
-    name = models.CharField(max_length=128, default='', unique=True)
-
-    def __str__(self):
-        return self.name
-
-
 class PPIDataset(models.Model):
     name = models.CharField(max_length=128, default='', unique=False)
     link = models.CharField(max_length=128, default='', unique=False)
@@ -60,18 +53,9 @@ class DrDiDataset(models.Model):
         unique_together = ('name', 'version')
 
 
-class ExpressionLevel(models.Model):
-    tissue = models.ForeignKey('Tissue', on_delete=models.CASCADE)
-    protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
-    expression_level = models.FloatField()
-
-    class Meta:
-        unique_together = ('tissue', 'protein')
-
-
-# class EnsemblGene(models.Model):
-#     name = models.CharField(max_length=15, unique=True)  # starts with ENSG...
-#     protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='ensg')
+class EnsemblGene(models.Model):
+    name = models.CharField(max_length=15)  # starts with ENSG...
+    protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='ensg')
 
 
 class Protein(models.Model):
@@ -84,6 +68,7 @@ class Protein(models.Model):
     entrez = models.CharField(max_length=15, default='')
     drugs = models.ManyToManyField('Drug', through='ProteinDrugInteraction',
                                    related_name='interacting_drugs')
+    ensembl = models.CharField(max_length=15, default='')
     tissue_expression = models.ManyToManyField('Tissue', through='ExpressionLevel',
                                                related_name='interacting_drugs')
 
@@ -106,6 +91,25 @@ class Protein(models.Model):
         self.entrez = other.entrez
 
 
+class ExpressionLevel(models.Model):
+    tissue = models.ForeignKey('Tissue', on_delete=models.CASCADE)
+    protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
+    expression_level = models.FloatField()
+
+    class Meta:
+        unique_together = ('tissue', 'protein')
+
+    def __hash__(self):
+        return hash(f'{self.tissue_id}_{self.protein_id}')
+
+
+class Tissue(models.Model):
+    name = models.CharField(max_length=128, default='', unique=True)
+
+    def __str__(self):
+        return self.name
+
+
 class Disorder(models.Model):
     mondo_id = models.CharField(max_length=7)
     label = models.CharField(max_length=256, default='')  # symbol
@@ -143,7 +147,7 @@ class Drug(models.Model):
         return self.drug_id
 
     def __eq__(self, other):
-        return self.drug_id == other.uniprot_code and self.name == other.name and self.status == other.status
+        return self.drug_id == other.drug_id and self.name == other.name and self.status == other.status
 
     def __ne__(self, other):
         return not self.__eq__(other)
@@ -168,6 +172,15 @@ class ProteinDisorderAssociation(models.Model):
     def __str__(self):
         return f'{self.pdis_dataset}-{self.protein}-{self.disorder}'
 
+    def __eq__(self, other):
+        return self.pdis_dataset_id == other.pdis_dataset_id and self.protein_id == other.protein_id and self.disorder_id == other.disorder_id
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return hash((self.pdis_dataset_id, self.protein_id, self.disorder_id))
+
 
 class DrugDisorderIndication(models.Model):
     drdi_dataset = models.ForeignKey(
@@ -181,6 +194,15 @@ class DrugDisorderIndication(models.Model):
     def __str__(self):
         return f'{self.drdi_dataset}-{self.drug}-{self.disorder}'
 
+    def __eq__(self, other):
+        return self.drdi_dataset_id == other.drdi_dataset_id and self.drug_id == other.drug_id and self.disorder_id == other.disorder_id
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return hash((self.drdi_dataset_id, self.drug_id, self.disorder_id))
+
 
 class ProteinProteinInteraction(models.Model):
     ppi_dataset = models.ForeignKey(
@@ -210,10 +232,19 @@ class ProteinProteinInteraction(models.Model):
     def __str__(self):
         return f'{self.ppi_dataset}-{self.from_protein}-{self.to_protein}'
 
+    def __eq__(self, other):
+        return self.ppi_dataset_id == other.ppi_dataset_id and self.from_protein_id == other.from_protein_id and self.to_protein_id == other.to_protein_id
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return hash((self.ppi_dataset_id, self.from_protein_id, self.to_protein_id))
+
 
 class ProteinDrugInteraction(models.Model):
     pdi_dataset = models.ForeignKey(
-        'PDIDataset', null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation')
+        PDIDataset, null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation')
     protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
     drug = models.ForeignKey('Drug', on_delete=models.CASCADE)
 
@@ -223,6 +254,15 @@ class ProteinDrugInteraction(models.Model):
     def __str__(self):
         return f'{self.pdi_dataset}-{self.protein}-{self.drug}'
 
+    def __eq__(self, other):
+        return self.pdi_dataset_id == other.pdi_dataset_id and self.protein_id == other.protein_id and self.drug_id == other.drug_id
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return hash((self.pdi_dataset_id, self.protein_id, self.drug_id))
+
 
 class Task(models.Model):
     token = models.CharField(max_length=32, unique=True)
diff --git a/docker-entrypoint.sh b/scripts/docker-entrypoint.sh
similarity index 63%
rename from docker-entrypoint.sh
rename to scripts/docker-entrypoint.sh
index 9686a3c..6dc4cd8 100755
--- a/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -3,13 +3,17 @@
 file="docker-entrypoint.lock"
 # exit if entrypoint.lock exists to prevent new import of data every time docker is restarted
 
-python3 manage.py makemigrations drugstone
-python3 manage.py migrate
+
 
 if ! test -f "$file"; then
+#    sh scripts/import-data.sh
+    python3 manage.py makemigrations drugstone
+    python3 manage.py migrate
     python3 manage.py createfixtures
     python3 manage.py cleanuptasks
-    sh scripts/import-data.sh
+    python3 manage.py populate_db -u --all
+    python3 manage.py make_graphs
+
     touch $file
 fi
 
diff --git a/scripts/import-data.sh b/scripts/import-data.sh
index de1c938..ea358a3 100755
--- a/scripts/import-data.sh
+++ b/scripts/import-data.sh
@@ -1,11 +1,15 @@
  #!/bin/bash
-python3 manage.py populate_db --delete_model PPI,PDI,Drug,Protein,Tissue,Disorder,PDiAssociations
+# python3 manage.py populate_db --delete_model PPI,PDI,PDi,DrDi,Drug,Protein,Tissue,Expression,Disorder
+#python3 manage.py populate_db --all -u
+#python3 manage.py populate_db -p -u
+#python3 manage.py populate_db --clear --all
+#python3 manage.py populate_db --delete_model Disorder
+#python3 manage.py populate_db -u --all
+#python3 manage.py populate_db --delete_model PDI --data_dir . -pdr
+#python3 manage.py populate_db --data_dir . -exp gene_tissue_expression.gct
 
-python3 manage.py populate_db --data_dir . -p
-python3 manage.py populate_db --data_dir . -exp gene_tissue_expression.gct
-
-python3 manage.py populate_db --data_dir . -dr
+#python3 manage.py populate_db -dr
 #python3 manage.py populate_db --data_dir . -pdr drug-protein-interaction.txt
-python3 manage.py populate_db -di
+#python3 manage.py populate_db -di
 #python3 manage.py populate_db --data_dir . -pdi "" -ddi ""
 #python3 manage.py populate_db -pp protein_protein_interaction_file.txt
\ No newline at end of file
diff --git a/tasks/betweenness_centrality_test.py b/tasks/betweenness_centrality_test.py
index 4edb602..b1a70f8 100755
--- a/tasks/betweenness_centrality_test.py
+++ b/tasks/betweenness_centrality_test.py
@@ -21,7 +21,7 @@ def betweenness_centrality_test(algorithm, parameters):
         print()
         print(results.get('node_attributes'))
             
-    task_hook = TaskHook(parameters, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook(parameters, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 class Range(object):
diff --git a/tasks/closeness_centrality_test.py b/tasks/closeness_centrality_test.py
index 9275dd4..095b0b3 100755
--- a/tasks/closeness_centrality_test.py
+++ b/tasks/closeness_centrality_test.py
@@ -20,7 +20,7 @@ def closeness_centrality_test(algorithm, parameters):
         print()
         print(results.get('node_attributes'))
             
-    task_hook = TaskHook(parameters, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook(parameters, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 class Range(object):
diff --git a/tasks/degree_centrality_test.py b/tasks/degree_centrality_test.py
index b082502..10596c5 100755
--- a/tasks/degree_centrality_test.py
+++ b/tasks/degree_centrality_test.py
@@ -20,7 +20,7 @@ def degree_centrality_test(algorithm, parameters):
         print()
         print(results.get('node_attributes'))
             
-    task_hook = TaskHook(parameters, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook(parameters, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 class Range(object):
diff --git a/tasks/keypathwayminer_test.py b/tasks/keypathwayminer_test.py
index d08fdb7..f3e1383 100755
--- a/tasks/keypathwayminer_test.py
+++ b/tasks/keypathwayminer_test.py
@@ -19,7 +19,7 @@ def task_test(algorithm):
                 print(f'   Edge #{j + 1}: {edge["from"]} -> {edge["to"]}')
             print()
 
-    task_hook = TaskHook({'k': 1, 'seeds': ['Q9BS26', 'O00124', 'P33527']}, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook({'k': 1, 'seeds': ['Q9BS26', 'O00124', 'P33527']}, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 
diff --git a/tasks/multi_steiner_test.py b/tasks/multi_steiner_test.py
index 6fd8734..dbf5c7e 100755
--- a/tasks/multi_steiner_test.py
+++ b/tasks/multi_steiner_test.py
@@ -20,7 +20,7 @@ def multi_steiner_test(algorithm, parameters):
         print()
         print(results.get('node_attributes'))
             
-    task_hook = TaskHook(parameters, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook(parameters, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 if __name__ == '__main__':
diff --git a/tasks/network_proximity_test.py b/tasks/network_proximity_test.py
index 1bbb179..9512b0a 100755
--- a/tasks/network_proximity_test.py
+++ b/tasks/network_proximity_test.py
@@ -21,7 +21,7 @@ def network_proximity_test(algorithm, parameters):
         print()
         print(results.get('node_attributes'))
 
-    task_hook = TaskHook(parameters, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook(parameters, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 
diff --git a/tasks/task_hook.py b/tasks/task_hook.py
index 9b8a669..42b860a 100755
--- a/tasks/task_hook.py
+++ b/tasks/task_hook.py
@@ -29,7 +29,7 @@ class TaskHook:
         """
         Returns the data directory including trailing slash.
 
-        :return: Data directory (e.g. '/app/data_drugstone/')
+        :return: Data directory (e.g. '/app/data/')
         """
         return self.__data_directory
 
diff --git a/tasks/task_test.py b/tasks/task_test.py
index 206f9e8..7cb9db5 100755
--- a/tasks/task_test.py
+++ b/tasks/task_test.py
@@ -19,7 +19,7 @@ def task_test(algorithm):
             print(f'   Edge #{j + 1}: {edge["from"]} -> {edge["to"]}')
         print()
 
-    task_hook = TaskHook({'seeds': ['Q9BS26', 'O00124', 'P33527']}, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook({'seeds': ['Q9BS26', 'O00124', 'P33527']}, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 
diff --git a/tasks/trust_rank_test.py b/tasks/trust_rank_test.py
index 62d2b91..bc9640d 100755
--- a/tasks/trust_rank_test.py
+++ b/tasks/trust_rank_test.py
@@ -20,7 +20,7 @@ def trust_rank_test(algorithm, parameters):
         print()
         print(results.get('node_attributes'))
 
-    task_hook = TaskHook(parameters, '../data_drugstone/', set_progress, set_result)
+    task_hook = TaskHook(parameters, '../data/', set_progress, set_result)
     algorithm(task_hook)
 
 
-- 
GitLab