From 639c1d18b146a7a6ac89f2e678733c50b253706c Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Mon, 4 Jul 2022 16:26:19 +0200
Subject: [PATCH] added update and autoupdate functions

Former-commit-id: 9c065a9953b12231572c9a7b73397602c2be1f55
---
 .../management/commands/import_from_nedrex.py |  56 ++---
 drugstone/management/commands/make_graphs.py  |  35 +--
 drugstone/management/commands/populate_db.py  | 213 +++++++++---------
 .../management/includes/DataPopulator.py      | 131 ++++-------
 drugstone/management/includes/NodeCache.py    |  22 +-
 drugstone/models.py                           |   3 +
 drugstone/settings/celery_schedule.py         |   2 +-
 drugstone/tasks.py                            |  16 +-
 scripts/docker-entrypoint.sh                  |   4 +-
 9 files changed, 235 insertions(+), 247 deletions(-)

diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py
index b3af226..7f5c889 100644
--- a/drugstone/management/commands/import_from_nedrex.py
+++ b/drugstone/management/commands/import_from_nedrex.py
@@ -107,7 +107,6 @@ class NedrexImporter:
         proteins = with_entrez
 
         iter_node_collection('gene', add_genes)
-        # TODO test updating ideas
 
         if update:
             (updates, creates) = identify_updates(proteins, self.cache.proteins)
@@ -116,6 +115,8 @@ class NedrexImporter:
             models.Protein.objects.bulk_create(creates)
             for protein in creates:
                 self.cache.proteins[protein.uniprot_code] = protein
+                self.cache.protein_updates.add(protein.uniprot_code)
+            return len(creates)
         else:
             models.Protein.objects.bulk_create(proteins.values())
             self.cache.proteins = proteins
@@ -132,14 +133,16 @@ class NedrexImporter:
 
         iter_node_collection('drug', add_drug)
 
-        # TODO test updating ideas
         if update:
             (updates, creates) = identify_updates(drugs, self.cache.drugs)
             for u in updates:
                 u.save()
+
             models.Drug.objects.bulk_create(creates)
             for drug in creates:
+                self.cache.drug_updates.add(drug.drug_id)
                 self.cache.drugs[drug.drug_id] = drug
+            return len(creates)
         else:
             models.Drug.objects.bulk_create(drugs.values())
             self.cache.drugs = drugs
@@ -157,14 +160,15 @@ class NedrexImporter:
 
         iter_node_collection('disorder', add_disorder)
 
-        # TODO test updating ideas
         if update:
             (updates, creates) = identify_updates(disorders, self.cache.disorders)
             for u in updates:
                 u.save()
             models.Disorder.objects.bulk_create(creates)
             for disorder in creates:
+                self.cache.disorder_updates.add(disorder.mondo_id)
                 self.cache.disorders[disorder.mondo_id] = disorder
+            return len(creates)
         else:
             models.Disorder.objects.bulk_create(disorders.values())
             self.cache.disorders = disorders
@@ -175,18 +179,14 @@ class NedrexImporter:
         self.cache.init_drugs()
         self.cache.init_proteins()
 
-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
-
         bulk = set()
 
         def add_dpi(edge):
             try:
-                bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset,
-                                                       drug=self.cache.get_drug_by_drugbank(
-                                                           to_id(edge['sourceDomainId'])),
-                                                       protein=self.cache.get_protein_by_uniprot(
-                                                           to_id(edge['targetDomainId']))))
+                drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
+                protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
+                if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
+                    bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
             except KeyError:
                 pass
 
@@ -197,9 +197,6 @@ class NedrexImporter:
     def import_protein_protein_interactions(self, dataset, update):
         self.cache.init_proteins()
 
-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
-
         bulk = list()
 
         def iter_ppi(eval):
@@ -216,11 +213,11 @@ class NedrexImporter:
 
         def add_ppi(edge):
             try:
-                bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset,
-                                                             from_protein=self.cache.get_protein_by_uniprot(
-                                                                 to_id(edge['memberOne'])),
-                                                             to_protein=self.cache.get_protein_by_uniprot(
-                                                                 to_id(edge['memberTwo']))))
+                protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
+                protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
+                if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
+                    bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
+                                                                 to_protein=protein2))
             except KeyError:
                 pass
 
@@ -232,18 +229,15 @@ class NedrexImporter:
         self.cache.init_disorders()
         self.cache.init_proteins()
 
-        if update:
-            models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
-
         bulk = set()
 
         def add_pdis(edge):
             try:
                 disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
                 for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
-                    bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset,
-                                                               protein=protein,
-                                                               disorder=disorder, score=edge['score']))
+                    if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
+                        bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
+                                                                   disorder=disorder, score=edge['score']))
             except KeyError:
                 pass
 
@@ -255,18 +249,14 @@ class NedrexImporter:
         self.cache.init_disorders()
         self.cache.init_drugs()
 
-        if update:
-            models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
-
         bulk = set()
 
         def add_drdis(edge):
             try:
-                bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset,
-                                                       drug=self.cache.get_drug_by_drugbank(
-                                                           to_id(edge['sourceDomainId'])),
-                                                       disorder=self.cache.get_disorder_by_mondo(
-                                                           to_id(edge['targetDomainId']))))
+                drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
+                disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
+                if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
+                    bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
             except KeyError:
                 pass
 
diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py
index a6c3d81..e024555 100755
--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -187,19 +187,22 @@ class Command(BaseCommand):
         pass
 
     def handle(self, *args, **kwargs):
-        ppi_datasets = models.PPIDataset.objects.all()
-        ppi_datasets_names = [e.name for e in ppi_datasets]
-
-        pdi_datasets = models.PDIDataset.objects.all()
-        pdi_datasets_names = [e.name for e in pdi_datasets]
-
-        parameter_combinations = []
-        for protein_interaction_dataset in ppi_datasets_names:
-            for pdi_dataset in pdi_datasets_names:
-                parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
-
-        # close all database connections so subprocesses will create their own connections
-        # this prevents the processes from running into problems because of using the same connection
-        db.connections.close_all()
-        pool = multiprocessing.Pool(KERNEL)
-        pool.map(create_gt, parameter_combinations)
+        run()
+
+def run():
+    ppi_datasets = models.PPIDataset.objects.all()
+    ppi_datasets_names = [e.name for e in ppi_datasets]
+
+    pdi_datasets = models.PDIDataset.objects.all()
+    pdi_datasets_names = [e.name for e in pdi_datasets]
+
+    parameter_combinations = []
+    for protein_interaction_dataset in ppi_datasets_names:
+        for pdi_dataset in pdi_datasets_names:
+            parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
+
+    # close all database connections so subprocesses will create their own connections
+    # this prevents the processes from running into problems because of using the same connection
+    db.connections.close_all()
+    pool = multiprocessing.Pool(KERNEL)
+    pool.map(create_gt, parameter_combinations)
\ No newline at end of file
diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py
index 66d878f..d1083a2 100755
--- a/drugstone/management/commands/populate_db.py
+++ b/drugstone/management/commands/populate_db.py
@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
 from drugstone.management.includes import DatasetLoader
 
 
+
+
 class DatabasePopulator:
     def __init__(self, data_dir):
         self.data_dir = data_dir
@@ -82,107 +84,114 @@ class Command(BaseCommand):
         parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
 
     def handle(self, *args, **kwargs):
-        nedrex_api_url = "http://82.148.225.92:8123/"
-        data_dir = kwargs['data_dir']
-
-        db_populator = DatabasePopulator(data_dir=data_dir)
-
-        if kwargs['clear']:
-            db_populator.delete_all()
-
-        if kwargs['delete_model'] is not None:
-            model_list = kwargs['delete_model'].split(',')
-            db_populator.delete_models(model_list)
-
-        cache = NodeCache()
-        update = True if kwargs['update'] else False
-        importer = NedrexImporter(nedrex_api_url, cache)
-        populator = DataPopulator(cache)
-
-        if kwargs['all']:
-            kwargs['drugs'] = True
-            kwargs['disorders'] = True
-            kwargs['proteins'] = True
-            kwargs['exp'] = True
-            kwargs['protein_protein'] = True
-            kwargs['protein_drug'] = True
-            kwargs['protein_disorder'] = True
-            kwargs['drug_disorder'] = True
-
-        if kwargs['drugs']:
-            print('Populating Drugs...')
-            n = NedrexImporter.import_drugs(importer, update)
-            print(f'Populated {n} Drugs.')
-
-        if kwargs['disorders']:
-            print('Populating Disorders...')
-            n = NedrexImporter.import_disorders(importer, update)
-            print(f'Populated {n} Disorders.')
-
-        if kwargs['proteins']:
-            print('Populating Proteins...')
-            n = NedrexImporter.import_proteins(importer, update)
-            print(f'Populated {n} Proteins.')
-            print('Populating ENSG IDs...')
-            n = DataPopulator.populate_ensg(populator,update)
-            print(f'Populated {n} ENSG IDs.')
-
-        if kwargs['exp']:
-            print('Populating Expressions...')
-            n = DataPopulator.populate_expressions(populator, update)
-            print(f'Populated {n} Expressions.')
-
-        if kwargs['protein_protein']:
-            print('Importing PPIs from NeDRexDB...')
-            n = NedrexImporter.import_protein_protein_interactions(importer,
+        populate(kwargs)
+
+def populate(kwargs):
+
+    nedrex_api_url = "http://82.148.225.92:8123/"
+    data_dir = kwargs['data_dir']
+
+    db_populator = DatabasePopulator(data_dir=data_dir)
+
+    if kwargs['clear']:
+        db_populator.delete_all()
+
+    if kwargs['delete_model'] is not None:
+        model_list = kwargs['delete_model'].split(',')
+        db_populator.delete_models(model_list)
+
+    cache = NodeCache()
+    update = True if kwargs['update'] else False
+    importer = NedrexImporter(nedrex_api_url, cache)
+    populator = DataPopulator(cache)
+
+    if kwargs['all']:
+        kwargs['drugs'] = True
+        kwargs['disorders'] = True
+        kwargs['proteins'] = True
+        kwargs['exp'] = True
+        kwargs['protein_protein'] = True
+        kwargs['protein_drug'] = True
+        kwargs['protein_disorder'] = True
+        kwargs['drug_disorder'] = True
+
+    if kwargs['drugs']:
+        print('Populating Drugs...')
+        n = NedrexImporter.import_drugs(importer, update)
+        print(f'Populated {n} Drugs.')
+
+    if kwargs['disorders']:
+        print('Populating Disorders...')
+        n = NedrexImporter.import_disorders(importer, update)
+        print(f'Populated {n} Disorders.')
+
+    if kwargs['proteins']:
+        print('Populating Proteins...')
+        n = NedrexImporter.import_proteins(importer, update)
+        print(f'Populated {n} Proteins.')
+        print('Populating ENSG IDs...')
+        n = DataPopulator.populate_ensg(populator, update)
+        print(f'Populated {n} ENSG IDs.')
+
+    if kwargs['exp']:
+        print('Populating Expressions...')
+        n = DataPopulator.populate_expressions(populator, update)
+        print(f'Populated {n} Expressions.')
+
+    if kwargs['protein_protein']:
+        print('Importing PPIs from NeDRexDB...')
+        n = NedrexImporter.import_protein_protein_interactions(importer,
                                                                DatasetLoader.get_ppi_nedrex(nedrex_api_url),
                                                                update)
-            print(f'Imported {n} PPIs from NeDRexDB')
-            print('Populating PPIs from STRING...')
-            n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
-            print(f'Populated {n} PPIs from STRING.')
-
-            print('Populating PPIs from APID...')
-            n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
-            print(f'Populated {n} PPIs from APID.')
-
-            print('Populating PPIs from BioGRID...')
-            n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
-            print(f'Populated {n} PPIs from BioGRID.')
-
-        if kwargs['protein_drug']:
-            print('Importing PDIs from NeDRexDB...')
-            n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update)
-            print(f'Imported {n} PDIs from NeDRexDB')
-
-            print('Populating PDIs from Chembl...')
-            n = DataPopulator.populate_pdi_chembl(populator,DatasetLoader.get_drug_target_chembl(), update)
-            print(f'Populated {n} PDIs from Chembl.')
-
-            print('Populating PDIs from DGIdb...')
-            n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
-            print(f'Populated {n} PDIs from DGIdb.')
-
-            print('Populating PDIs from DrugBank...')
-            n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
-            print(f'Populated {n} PDIs from DrugBank.')
-
-        if kwargs['protein_disorder']:
-            print('Importing PDis from NeDRexDB...')
-            n = NedrexImporter.import_protein_disorder_associations(importer,
-                                                               DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url),
-                                                               update)
-            print(f'Imported {n} PDis from NeDRexDB')
-            print('Populating PDis associations from DisGeNET...')
-            n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
-            print(f'Populated {n} PDis associations from DisGeNET.')
-
-        if kwargs['drug_disorder']:
-            print('Importing DrDis from NeDRexDB...')
-            n = NedrexImporter.import_drug_disorder_indications(importer,
-                                                               DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
-                                                               update)
-            print(f'Imported {n} DrDis from NeDRexDB')
-            print('Populating DrDi indications from DrugBank...')
-            n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
-            print(f'Populated {n} DrDi associations from DrugBank.')
+        print(f'Imported {n} PPIs from NeDRexDB')
+        print('Populating PPIs from STRING...')
+        n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
+        print(f'Populated {n} PPIs from STRING.')
+
+        print('Populating PPIs from APID...')
+        n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
+        print(f'Populated {n} PPIs from APID.')
+
+        print('Populating PPIs from BioGRID...')
+        n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
+        print(f'Populated {n} PPIs from BioGRID.')
+
+    if kwargs['protein_drug']:
+        print('Importing PDIs from NeDRexDB...')
+        n = NedrexImporter.import_drug_target_interactions(importer,
+                                                           DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
+                                                           update)
+        print(f'Imported {n} PDIs from NeDRexDB')
+
+        print('Populating PDIs from Chembl...')
+        n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
+        print(f'Populated {n} PDIs from Chembl.')
+
+        print('Populating PDIs from DGIdb...')
+        n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
+        print(f'Populated {n} PDIs from DGIdb.')
+
+        print('Populating PDIs from DrugBank...')
+        n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
+        print(f'Populated {n} PDIs from DrugBank.')
+
+    if kwargs['protein_disorder']:
+        print('Importing PDis from NeDRexDB...')
+        n = NedrexImporter.import_protein_disorder_associations(importer,
+                                                                DatasetLoader.get_protein_disorder_nedrex(
+                                                                    nedrex_api_url),
+                                                                update)
+        print(f'Imported {n} PDis from NeDRexDB')
+        print('Populating PDis associations from DisGeNET...')
+        n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
+        print(f'Populated {n} PDis associations from DisGeNET.')
+
+    if kwargs['drug_disorder']:
+        print('Importing DrDis from NeDRexDB...')
+        n = NedrexImporter.import_drug_disorder_indications(importer,
+                                                            DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
+                                                            update)
+        print(f'Imported {n} DrDis from NeDRexDB')
+        print('Populating DrDi indications from DrugBank...')
+        n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
+        print(f'Populated {n} DrDi associations from DrugBank.')
diff --git a/drugstone/management/includes/DataPopulator.py b/drugstone/management/includes/DataPopulator.py
index 5858202..b31055f 100755
--- a/drugstone/management/includes/DataPopulator.py
+++ b/drugstone/management/includes/DataPopulator.py
@@ -9,19 +9,13 @@ class DataPopulator:
         self.cache = cache
 
     def populate_expressions(self, update):
-        if update:
-            models.ExpressionLevel.objects.all().delete()
 
         self.cache.init_proteins()
         df = DataLoader.load_expressions()
 
         tissues_models = dict()
         for tissue_name in df.columns.values[2:]:
-            try:
-                tissue_model = models.Tissue.objects.get(name=tissue_name)
-            except models.Tissue.DoesNotExist:
-                tissue_model = models.Tissue.objects.create(name=tissue_name)
-            tissues_models[tissue_name] = tissue_model
+            tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
 
         proteins_linked = 0
         bulk = set()
@@ -33,16 +27,16 @@ class DataPopulator:
 
             for protein_model in self.cache.get_proteins_by_gene(gene_name):
                 proteins_linked += 1
-
-                for tissue_name, tissue_model in tissues_models.items():
-                    expr = models.ExpressionLevel(protein=protein_model,
-                                                  tissue=tissue_model,
-                                                  expression_level=row[tissue_name])
-                    id = hash(expr)
-                    if id in uniq:
-                        continue
-                    uniq.add(id)
-                    bulk.add(expr)
+                if not update or self.cache.is_new_protein(protein_model):
+                    for tissue_name, tissue_model in tissues_models.items():
+                        expr = models.ExpressionLevel(protein=protein_model,
+                                                      tissue=tissue_model,
+                                                      expression_level=row[tissue_name])
+                        id = hash(expr)
+                        if id in uniq:
+                            continue
+                        uniq.add(id)
+                        bulk.add(expr)
             if len(bulk) > 100000:
                 models.ExpressionLevel.objects.bulk_create(bulk)
                 size += len(bulk)
@@ -59,8 +53,6 @@ class DataPopulator:
         Returns:
             int: Count of how many ensg-protein relations were added
         """
-        if update:
-            models.EnsemblGene.objects.all().delete()
         self.cache.init_proteins()
         data = DataLoader.load_ensg()
         bulk = list()
@@ -69,7 +61,8 @@ class DataPopulator:
             proteins = self.cache.get_proteins_by_entrez(entrez)
             for protein in proteins:
                 for ensg in ensg_list:
-                    bulk.append(models.EnsemblGene(name=ensg, protein=protein))
+                    if not update or self.cache.is_new_protein(protein):
+                        bulk.append(models.EnsemblGene(name=ensg, protein=protein))
         models.EnsemblGene.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -81,8 +74,6 @@ class DataPopulator:
             int: Count of how many interactions were added
         """
         self.cache.init_proteins()
-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
 
         df = DataLoader.load_ppi_string()
         bulk = list()
@@ -92,19 +83,15 @@ class DataPopulator:
                 proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
                 proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
             except KeyError:
-                # continue if not found
                 continue
             for protein_a in proteins_a:
                 for protein_b in proteins_b:
-                    try:
+                    if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
                         bulk.append(models.ProteinProteinInteraction(
                             ppi_dataset=dataset,
                             from_protein=protein_a,
                             to_protein=protein_b
                         ))
-                    except models.ValidationError:
-                        # duplicate
-                        continue
         models.ProteinProteinInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -117,8 +104,6 @@ class DataPopulator:
         """
         self.cache.init_proteins()
 
-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
         df = DataLoader.load_ppi_apid()
         bulk = set()
         for _, row in df.iterrows():
@@ -129,14 +114,12 @@ class DataPopulator:
             except KeyError:
                 # continue if not found
                 continue
-            try:
+            if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
                 bulk.add(models.ProteinProteinInteraction(
                     ppi_dataset=dataset,
                     from_protein=protein_a,
                     to_protein=protein_b
                 ))
-            except models.ValidationError:
-                continue
         models.ProteinProteinInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -149,8 +132,6 @@ class DataPopulator:
         """
         self.cache.init_proteins()
 
-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
         df = DataLoader.load_ppi_biogrid()
         bulk = list()
         for _, row in df.iterrows():
@@ -164,15 +145,12 @@ class DataPopulator:
                 continue
             for protein_a in proteins_a:
                 for protein_b in proteins_b:
-                    try:
+                    if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
                         bulk.append(models.ProteinProteinInteraction(
                             ppi_dataset=dataset,
                             from_protein=protein_a,
                             to_protein=protein_b
                         ))
-                    except models.ValidationError:
-                        # duplicate
-                        continue
         models.ProteinProteinInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -186,8 +164,6 @@ class DataPopulator:
         self.cache.init_proteins()
         self.cache.init_drugs()
 
-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
         df = DataLoader.load_pdi_chembl()
         bulk = set()
         for _, row in df.iterrows():
@@ -202,11 +178,12 @@ class DataPopulator:
             except KeyError:
                 # continue if not found
                 continue
-            bulk.add(models.ProteinDrugInteraction(
-                pdi_dataset=dataset,
-                protein=protein,
-                drug=drug
-            ))
+            if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
+                bulk.add(models.ProteinDrugInteraction(
+                    pdi_dataset=dataset,
+                    protein=protein,
+                    drug=drug
+                ))
         models.ProteinDrugInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -220,9 +197,6 @@ class DataPopulator:
         self.cache.init_proteins()
         self.cache.init_disorders()
 
-
-        if update:
-            models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
         df = DataLoader.load_pdis_disgenet()
         bulk = set()
         for _, row in df.iterrows():
@@ -238,12 +212,13 @@ class DataPopulator:
             except KeyError:
                 # continue if not found
                 continue
-            bulk.add(models.ProteinDisorderAssociation(
-                pdis_dataset=dataset,
-                protein=protein,
-                disorder=disorder,
-                score=row['score']
-            ))
+            if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
+                bulk.add(models.ProteinDisorderAssociation(
+                    pdis_dataset=dataset,
+                    protein=protein,
+                    disorder=disorder,
+                    score=row['score']
+                ))
         models.ProteinDisorderAssociation.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -256,8 +231,6 @@ class DataPopulator:
         """
         self.cache.init_drugs()
         self.cache.init_disorders()
-        if update:
-            models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
 
         df = DataLoader.load_drdis_drugbank()
         bulk = set()
@@ -274,11 +247,12 @@ class DataPopulator:
             except KeyError:
                 # continue if not found
                 continue
-            bulk.add(models.DrugDisorderIndication(
-                drdi_dataset=dataset,
-                drug=drug,
-                disorder=disorder,
-            ))
+            if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
+                bulk.add(models.DrugDisorderIndication(
+                    drdi_dataset=dataset,
+                    drug=drug,
+                    disorder=disorder,
+                ))
         models.DrugDisorderIndication.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -292,29 +266,24 @@ class DataPopulator:
         self.cache.init_proteins()
         self.cache.init_drugs()
 
-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
         df = DataLoader.load_pdi_dgidb()
         bulk = set()
         for _, row in df.iterrows():
             try:
-                # try fetching protein
                 proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
             except KeyError:
-                # continue if not found
                 continue
             try:
-                # try fetching drug
                 drug = self.cache.get_drug_by_drugbank(row['drug_id'])
             except KeyError:
-                # continue if not found
                 continue
             for protein in proteins:
-                bulk.add(models.ProteinDrugInteraction(
-                    pdi_dataset=dataset,
-                    protein=protein,
-                    drug=drug
-                ))
+                if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
+                    bulk.add(models.ProteinDrugInteraction(
+                        pdi_dataset=dataset,
+                        protein=protein,
+                        drug=drug
+                    ))
         models.ProteinDrugInteraction.objects.bulk_create(bulk)
         return len(bulk)
 
@@ -328,29 +297,23 @@ class DataPopulator:
         self.cache.init_proteins()
         self.cache.init_drugs()
 
-
-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
         df = DataLoader.load_pdi_drugbank()
         bulk = set()
         for _, row in df.iterrows():
             try:
-                # try fetching protein
                 proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
             except KeyError:
-                # continue if not found
                 continue
             try:
-                # try fetching drug
                 drug = self.cache.get_drug_by_drugbank(row['drug_id'])
             except KeyError:
-                # continue if not found
                 continue
             for protein in proteins:
-                bulk.add(models.ProteinDrugInteraction(
-                    pdi_dataset=dataset,
-                    protein=protein,
-                    drug=drug
-                ))
+                if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
+                    bulk.add(models.ProteinDrugInteraction(
+                        pdi_dataset=dataset,
+                        protein=protein,
+                        drug=drug
+                    ))
         models.ProteinDrugInteraction.objects.bulk_create(bulk)
         return len(bulk)
diff --git a/drugstone/management/includes/NodeCache.py b/drugstone/management/includes/NodeCache.py
index 7f9491c..5df92d8 100644
--- a/drugstone/management/includes/NodeCache.py
+++ b/drugstone/management/includes/NodeCache.py
@@ -10,6 +10,10 @@ class NodeCache:
     disorders = dict()
     drugs = dict()
 
+    drug_updates = set()
+    disorder_updates = set()
+    protein_updates = set()
+
     def init_protein_maps(self):
         print("Generating protein id maps...")
         for protein in self.proteins.values():
@@ -20,23 +24,39 @@ class NodeCache:
         if len(self.proteins) == 0:
             print("Generating protein maps...")
             for protein in models.Protein.objects.all():
+                if protein.id < 1000:
+                    protein.delete()
+                    continue
                 self.proteins[protein.uniprot_code] = protein
         if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
             self.init_protein_maps()
 
-
     def init_drugs(self):
         if len(self.drugs) == 0:
             print("Generating drug map...")
             for drug in models.Drug.objects.all():
+                if drug.id < 1000:
+                    drug.delete()
+                    continue
                 self.drugs[drug.drug_id] = drug
 
     def init_disorders(self):
         if len(self.disorders) == 0:
             print("Generating disorder map...")
             for disorder in models.Disorder.objects.all():
+                if disorder.id < 1000:
+                    disorder.delete()
+                    continue
                 self.disorders[disorder.mondo_id] = disorder
 
+    def is_new_protein(self, protein:models.Protein):
+        return protein.uniprot_code in self.protein_updates
+
+    def is_new_drug(self, drug:models.Drug):
+        return drug.drug_id in self.drug_updates
+
+    def is_new_disease(self, disease:models.Disorder):
+        return disease.mondo_id in self.disorder_updates
 
     def get_protein_by_uniprot(self,uniprot_id):
         return self.proteins[uniprot_id]
diff --git a/drugstone/models.py b/drugstone/models.py
index 5391a43..b2c7227 100755
--- a/drugstone/models.py
+++ b/drugstone/models.py
@@ -84,6 +84,9 @@ class Protein(models.Model):
     def __ne__(self, other):
         return not self.__eq__(other)
 
+    def __hash__(self):
+        return hash((self.uniprot_code, self.gene, self.entrez))
+
     def update(self, other):
         self.uniprot_code = other.uniprot_code
         self.gene = other.gene
diff --git a/drugstone/settings/celery_schedule.py b/drugstone/settings/celery_schedule.py
index de5a78a..b066327 100644
--- a/drugstone/settings/celery_schedule.py
+++ b/drugstone/settings/celery_schedule.py
@@ -3,6 +3,6 @@ from celery.schedules import crontab
 CELERY_BEAT_SCHEDULE = {
     'update_db': {
         'task': 'drugstone.tasks.task_update_db_from_nedrex',
-        'schedule': crontab(minute='*/1'),
+        'schedule': crontab(day_of_week=1, hour=5, minute=0),
     },
 }
diff --git a/drugstone/tasks.py b/drugstone/tasks.py
index 7451162..03449fc 100644
--- a/drugstone/tasks.py
+++ b/drugstone/tasks.py
@@ -1,18 +1,18 @@
 from celery import shared_task
 from celery.utils.log import get_task_logger
-from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data
+from drugstone.management.commands.populate_db import populate
+from drugstone.management.commands.make_graphs import run as make_graphs
 
 logger = get_task_logger(__name__)
 
+nedrex_api_url = "http://82.148.225.92:8123/"
+
 
 @shared_task
 def task_update_db_from_nedrex():
     logger.info('Updating DB from NeDRex.')
-    print('here')
-
-    logger.info('Fetching data...')
-    # fetch_nedrex_data()
-
-    logger.info('Integrating data...')
-    # integrate_nedrex_data()
+    logger.info('Updating data...')
+    populate({"all": True, "update": True})
+    logger.info('Recreating networks...')
+    make_graphs()
     logger.info('Done.')
diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh
index a525f29..142a9f5 100755
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
 
 
 
-if ! test -f "$file"; then
+#if ! test -f "$file"; then
 #    sh scripts/import-data.sh
     python3 manage.py makemigrations drugstone
     python3 manage.py migrate
@@ -14,6 +14,6 @@ if ! test -f "$file"; then
     python3 manage.py populate_db -u --all
     python3 manage.py make_graphs
     touch $file
-fi
+#fi
 
 /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
-- 
GitLab