added update and autoupdate functions

Former-commit-id: 1452720df605af5280396874a2c4f41a06b291ef [formerly c5ffaf4e6fd037c3fd9e0a0f6d71096e6037d754] Former-commit-id: 0186ba3f7cd4e20538fb0625ac272cae9844a75d

added update and autoupdate functions
71505df5 · AndiMajore · 027a9b28 · 71505df5 · 71505df5 · 71505df5
Commit 71505df5 authored 2 years ago by AndiMajore
--- a/drugstone/management/commands/import_from_nedrex.py
+++ b/drugstone/management/commands/import_from_nedrex.py
@@ -107,7 +107,6 @@ class NedrexImporter:
        proteins = with_entrez

        iter_node_collection('gene', add_genes)
-        # TODO test updating ideas

        if update:
            (updates, creates) = identify_updates(proteins, self.cache.proteins)
@@ -116,6 +115,8 @@ class NedrexImporter:
            models.Protein.objects.bulk_create(creates)
            for protein in creates:
                self.cache.proteins[protein.uniprot_code] = protein
+                self.cache.protein_updates.add(protein.uniprot_code)
+            return len(creates)
        else:
            models.Protein.objects.bulk_create(proteins.values())
            self.cache.proteins = proteins
@@ -132,14 +133,16 @@ class NedrexImporter:

        iter_node_collection('drug', add_drug)

-        # TODO test updating ideas
        if update:
            (updates, creates) = identify_updates(drugs, self.cache.drugs)
            for u in updates:
                u.save()
+
            models.Drug.objects.bulk_create(creates)
            for drug in creates:
+                self.cache.drug_updates.add(drug.drug_id)
                self.cache.drugs[drug.drug_id] = drug
+            return len(creates)
        else:
            models.Drug.objects.bulk_create(drugs.values())
            self.cache.drugs = drugs
@@ -157,14 +160,15 @@ class NedrexImporter:

        iter_node_collection('disorder', add_disorder)

-        # TODO test updating ideas
        if update:
            (updates, creates) = identify_updates(disorders, self.cache.disorders)
            for u in updates:
                u.save()
            models.Disorder.objects.bulk_create(creates)
            for disorder in creates:
+                self.cache.disorder_updates.add(disorder.mondo_id)
                self.cache.disorders[disorder.mondo_id] = disorder
+            return len(creates)
        else:
            models.Disorder.objects.bulk_create(disorders.values())
            self.cache.disorders = disorders
@@ -175,18 +179,14 @@ class NedrexImporter:
        self.cache.init_drugs()
        self.cache.init_proteins()

-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
-
        bulk = set()

        def add_dpi(edge):
            try:
-                bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset,
-                                                       drug=self.cache.get_drug_by_drugbank(
-                                                           to_id(edge['sourceDomainId'])),
-                                                       protein=self.cache.get_protein_by_uniprot(
-                                                           to_id(edge['targetDomainId']))))
+                drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
+                protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
+                if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
+                    bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
            except KeyError:
                pass

@@ -197,9 +197,6 @@ class NedrexImporter:
    def import_protein_protein_interactions(self, dataset, update):
        self.cache.init_proteins()

-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
-
        bulk = list()

        def iter_ppi(eval):
@@ -216,11 +213,11 @@ class NedrexImporter:

        def add_ppi(edge):
            try:
-                bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset,
-                                                             from_protein=self.cache.get_protein_by_uniprot(
-                                                                 to_id(edge['memberOne'])),
-                                                             to_protein=self.cache.get_protein_by_uniprot(
-                                                                 to_id(edge['memberTwo']))))
+                protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
+                protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
+                if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
+                    bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
+                                                                 to_protein=protein2))
            except KeyError:
                pass

@@ -232,18 +229,15 @@ class NedrexImporter:
        self.cache.init_disorders()
        self.cache.init_proteins()

-        if update:
-            models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
-
        bulk = set()

        def add_pdis(edge):
            try:
                disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
                for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
-                    bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset,
-                                                               protein=protein,
-                                                               disorder=disorder, score=edge['score']))
+                    if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
+                        bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
+                                                                   disorder=disorder, score=edge['score']))
            except KeyError:
                pass

@@ -255,18 +249,14 @@ class NedrexImporter:
        self.cache.init_disorders()
        self.cache.init_drugs()

-        if update:
-            models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
-
        bulk = set()

        def add_drdis(edge):
            try:
-                bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset,
-                                                       drug=self.cache.get_drug_by_drugbank(
-                                                           to_id(edge['sourceDomainId'])),
-                                                       disorder=self.cache.get_disorder_by_mondo(
-                                                           to_id(edge['targetDomainId']))))
+                drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
+                disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
+                if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
+                    bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
            except KeyError:
                pass


--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -187,19 +187,22 @@ class Command(BaseCommand):
        pass

    def handle(self, *args, **kwargs):
-        ppi_datasets = models.PPIDataset.objects.all()
-        ppi_datasets_names = [e.name for e in ppi_datasets]
-
-        pdi_datasets = models.PDIDataset.objects.all()
-        pdi_datasets_names = [e.name for e in pdi_datasets]
-
-        parameter_combinations = []
-        for protein_interaction_dataset in ppi_datasets_names:
-            for pdi_dataset in pdi_datasets_names:
-                parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
-
-        # close all database connections so subprocesses will create their own connections
-        # this prevents the processes from running into problems because of using the same connection
-        db.connections.close_all()
-        pool = multiprocessing.Pool(KERNEL)
-        pool.map(create_gt, parameter_combinations)
+        run()
+
+def run():
+    ppi_datasets = models.PPIDataset.objects.all()
+    ppi_datasets_names = [e.name for e in ppi_datasets]
+
+    pdi_datasets = models.PDIDataset.objects.all()
+    pdi_datasets_names = [e.name for e in pdi_datasets]
+
+    parameter_combinations = []
+    for protein_interaction_dataset in ppi_datasets_names:
+        for pdi_dataset in pdi_datasets_names:
+            parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
+
+    # close all database connections so subprocesses will create their own connections
+    # this prevents the processes from running into problems because of using the same connection
+    db.connections.close_all()
+    pool = multiprocessing.Pool(KERNEL)
+    pool.map(create_gt, parameter_combinations)
\ No newline at end of file
--- a/drugstone/management/commands/populate_db.py
+++ b/drugstone/management/commands/populate_db.py
@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
 from drugstone.management.includes import DatasetLoader


+
+
 class DatabasePopulator:
    def __init__(self, data_dir):
        self.data_dir = data_dir
@@ -82,107 +84,114 @@ class Command(BaseCommand):
        parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')

    def handle(self, *args, **kwargs):
-        nedrex_api_url = "http://82.148.225.92:8123/"
-        data_dir = kwargs['data_dir']
-
-        db_populator = DatabasePopulator(data_dir=data_dir)
-
-        if kwargs['clear']:
-            db_populator.delete_all()
-
-        if kwargs['delete_model'] is not None:
-            model_list = kwargs['delete_model'].split(',')
-            db_populator.delete_models(model_list)
-
-        cache = NodeCache()
-        update = True if kwargs['update'] else False
-        importer = NedrexImporter(nedrex_api_url, cache)
-        populator = DataPopulator(cache)
-
-        if kwargs['all']:
-            kwargs['drugs'] = True
-            kwargs['disorders'] = True
-            kwargs['proteins'] = True
-            kwargs['exp'] = True
-            kwargs['protein_protein'] = True
-            kwargs['protein_drug'] = True
-            kwargs['protein_disorder'] = True
-            kwargs['drug_disorder'] = True
-
-        if kwargs['drugs']:
-            print('Populating Drugs...')
-            n = NedrexImporter.import_drugs(importer, update)
-            print(f'Populated {n} Drugs.')
-
-        if kwargs['disorders']:
-            print('Populating Disorders...')
-            n = NedrexImporter.import_disorders(importer, update)
-            print(f'Populated {n} Disorders.')
-
-        if kwargs['proteins']:
-            print('Populating Proteins...')
-            n = NedrexImporter.import_proteins(importer, update)
-            print(f'Populated {n} Proteins.')
-            print('Populating ENSG IDs...')
-            n = DataPopulator.populate_ensg(populator,update)
-            print(f'Populated {n} ENSG IDs.')
-
-        if kwargs['exp']:
-            print('Populating Expressions...')
-            n = DataPopulator.populate_expressions(populator, update)
-            print(f'Populated {n} Expressions.')
-
-        if kwargs['protein_protein']:
-            print('Importing PPIs from NeDRexDB...')
-            n = NedrexImporter.import_protein_protein_interactions(importer,
+        populate(kwargs)
+
+def populate(kwargs):
+
+    nedrex_api_url = "http://82.148.225.92:8123/"
+    data_dir = kwargs['data_dir']
+
+    db_populator = DatabasePopulator(data_dir=data_dir)
+
+    if kwargs['clear']:
+        db_populator.delete_all()
+
+    if kwargs['delete_model'] is not None:
+        model_list = kwargs['delete_model'].split(',')
+        db_populator.delete_models(model_list)
+
+    cache = NodeCache()
+    update = True if kwargs['update'] else False
+    importer = NedrexImporter(nedrex_api_url, cache)
+    populator = DataPopulator(cache)
+
+    if kwargs['all']:
+        kwargs['drugs'] = True
+        kwargs['disorders'] = True
+        kwargs['proteins'] = True
+        kwargs['exp'] = True
+        kwargs['protein_protein'] = True
+        kwargs['protein_drug'] = True
+        kwargs['protein_disorder'] = True
+        kwargs['drug_disorder'] = True
+
+    if kwargs['drugs']:
+        print('Populating Drugs...')
+        n = NedrexImporter.import_drugs(importer, update)
+        print(f'Populated {n} Drugs.')
+
+    if kwargs['disorders']:
+        print('Populating Disorders...')
+        n = NedrexImporter.import_disorders(importer, update)
+        print(f'Populated {n} Disorders.')
+
+    if kwargs['proteins']:
+        print('Populating Proteins...')
+        n = NedrexImporter.import_proteins(importer, update)
+        print(f'Populated {n} Proteins.')
+        print('Populating ENSG IDs...')
+        n = DataPopulator.populate_ensg(populator, update)
+        print(f'Populated {n} ENSG IDs.')
+
+    if kwargs['exp']:
+        print('Populating Expressions...')
+        n = DataPopulator.populate_expressions(populator, update)
+        print(f'Populated {n} Expressions.')
+
+    if kwargs['protein_protein']:
+        print('Importing PPIs from NeDRexDB...')
+        n = NedrexImporter.import_protein_protein_interactions(importer,
                                                               DatasetLoader.get_ppi_nedrex(nedrex_api_url),
                                                               update)
-            print(f'Imported {n} PPIs from NeDRexDB')
-            print('Populating PPIs from STRING...')
-            n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
-            print(f'Populated {n} PPIs from STRING.')
-
-            print('Populating PPIs from APID...')
-            n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
-            print(f'Populated {n} PPIs from APID.')
-
-            print('Populating PPIs from BioGRID...')
-            n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
-            print(f'Populated {n} PPIs from BioGRID.')
-
-        if kwargs['protein_drug']:
-            print('Importing PDIs from NeDRexDB...')
-            n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update)
-            print(f'Imported {n} PDIs from NeDRexDB')
-
-            print('Populating PDIs from Chembl...')
-            n = DataPopulator.populate_pdi_chembl(populator,DatasetLoader.get_drug_target_chembl(), update)
-            print(f'Populated {n} PDIs from Chembl.')
-
-            print('Populating PDIs from DGIdb...')
-            n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
-            print(f'Populated {n} PDIs from DGIdb.')
-
-            print('Populating PDIs from DrugBank...')
-            n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
-            print(f'Populated {n} PDIs from DrugBank.')
-
-        if kwargs['protein_disorder']:
-            print('Importing PDis from NeDRexDB...')
-            n = NedrexImporter.import_protein_disorder_associations(importer,
-                                                               DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url),
-                                                               update)
-            print(f'Imported {n} PDis from NeDRexDB')
-            print('Populating PDis associations from DisGeNET...')
-            n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
-            print(f'Populated {n} PDis associations from DisGeNET.')
-
-        if kwargs['drug_disorder']:
-            print('Importing DrDis from NeDRexDB...')
-            n = NedrexImporter.import_drug_disorder_indications(importer,
-                                                               DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
-                                                               update)
-            print(f'Imported {n} DrDis from NeDRexDB')
-            print('Populating DrDi indications from DrugBank...')
-            n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
-            print(f'Populated {n} DrDi associations from DrugBank.')
+        print(f'Imported {n} PPIs from NeDRexDB')
+        print('Populating PPIs from STRING...')
+        n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
+        print(f'Populated {n} PPIs from STRING.')
+
+        print('Populating PPIs from APID...')
+        n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
+        print(f'Populated {n} PPIs from APID.')
+
+        print('Populating PPIs from BioGRID...')
+        n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
+        print(f'Populated {n} PPIs from BioGRID.')
+
+    if kwargs['protein_drug']:
+        print('Importing PDIs from NeDRexDB...')
+        n = NedrexImporter.import_drug_target_interactions(importer,
+                                                           DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
+                                                           update)
+        print(f'Imported {n} PDIs from NeDRexDB')
+
+        print('Populating PDIs from Chembl...')
+        n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
+        print(f'Populated {n} PDIs from Chembl.')
+
+        print('Populating PDIs from DGIdb...')
+        n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
+        print(f'Populated {n} PDIs from DGIdb.')
+
+        print('Populating PDIs from DrugBank...')
+        n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
+        print(f'Populated {n} PDIs from DrugBank.')
+
+    if kwargs['protein_disorder']:
+        print('Importing PDis from NeDRexDB...')
+        n = NedrexImporter.import_protein_disorder_associations(importer,
+                                                                DatasetLoader.get_protein_disorder_nedrex(
+                                                                    nedrex_api_url),
+                                                                update)
+        print(f'Imported {n} PDis from NeDRexDB')
+        print('Populating PDis associations from DisGeNET...')
+        n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
+        print(f'Populated {n} PDis associations from DisGeNET.')
+
+    if kwargs['drug_disorder']:
+        print('Importing DrDis from NeDRexDB...')
+        n = NedrexImporter.import_drug_disorder_indications(importer,
+                                                            DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
+                                                            update)
+        print(f'Imported {n} DrDis from NeDRexDB')
+        print('Populating DrDi indications from DrugBank...')
+        n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
+        print(f'Populated {n} DrDi associations from DrugBank.')
--- a/drugstone/management/includes/DataPopulator.py
+++ b/drugstone/management/includes/DataPopulator.py
@@ -9,19 +9,13 @@ class DataPopulator:
        self.cache = cache

    def populate_expressions(self, update):
-        if update:
-            models.ExpressionLevel.objects.all().delete()

        self.cache.init_proteins()
        df = DataLoader.load_expressions()

        tissues_models = dict()
        for tissue_name in df.columns.values[2:]:
-            try:
-                tissue_model = models.Tissue.objects.get(name=tissue_name)
-            except models.Tissue.DoesNotExist:
-                tissue_model = models.Tissue.objects.create(name=tissue_name)
-            tissues_models[tissue_name] = tissue_model
+            tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)

        proteins_linked = 0
        bulk = set()
@@ -33,16 +27,16 @@ class DataPopulator:

            for protein_model in self.cache.get_proteins_by_gene(gene_name):
                proteins_linked += 1
-
-                for tissue_name, tissue_model in tissues_models.items():
-                    expr = models.ExpressionLevel(protein=protein_model,
-                                                  tissue=tissue_model,
-                                                  expression_level=row[tissue_name])
-                    id = hash(expr)
-                    if id in uniq:
-                        continue
-                    uniq.add(id)
-                    bulk.add(expr)
+                if not update or self.cache.is_new_protein(protein_model):
+                    for tissue_name, tissue_model in tissues_models.items():
+                        expr = models.ExpressionLevel(protein=protein_model,
+                                                      tissue=tissue_model,
+                                                      expression_level=row[tissue_name])
+                        id = hash(expr)
+                        if id in uniq:
+                            continue
+                        uniq.add(id)
+                        bulk.add(expr)
            if len(bulk) > 100000:
                models.ExpressionLevel.objects.bulk_create(bulk)
                size += len(bulk)
@@ -59,8 +53,6 @@ class DataPopulator:
        Returns:
            int: Count of how many ensg-protein relations were added
        """
-        if update:
-            models.EnsemblGene.objects.all().delete()
        self.cache.init_proteins()
        data = DataLoader.load_ensg()
        bulk = list()
@@ -69,7 +61,8 @@ class DataPopulator:
            proteins = self.cache.get_proteins_by_entrez(entrez)
            for protein in proteins:
                for ensg in ensg_list:
-                    bulk.append(models.EnsemblGene(name=ensg, protein=protein))
+                    if not update or self.cache.is_new_protein(protein):
+                        bulk.append(models.EnsemblGene(name=ensg, protein=protein))
        models.EnsemblGene.objects.bulk_create(bulk)
        return len(bulk)

@@ -81,8 +74,6 @@ class DataPopulator:
            int: Count of how many interactions were added
        """
        self.cache.init_proteins()
-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()

        df = DataLoader.load_ppi_string()
        bulk = list()
@@ -92,19 +83,15 @@ class DataPopulator:
                proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
                proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
            except KeyError:
-                # continue if not found
                continue
            for protein_a in proteins_a:
                for protein_b in proteins_b:
-                    try:
+                    if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
                        bulk.append(models.ProteinProteinInteraction(
                            ppi_dataset=dataset,
                            from_protein=protein_a,
                            to_protein=protein_b
                        ))
-                    except models.ValidationError:
-                        # duplicate
-                        continue
        models.ProteinProteinInteraction.objects.bulk_create(bulk)
        return len(bulk)

@@ -117,8 +104,6 @@ class DataPopulator:
        """
        self.cache.init_proteins()

-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
        df = DataLoader.load_ppi_apid()
        bulk = set()
        for _, row in df.iterrows():
@@ -129,14 +114,12 @@ class DataPopulator:
            except KeyError:
                # continue if not found
                continue
-            try:
+            if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
                bulk.add(models.ProteinProteinInteraction(
                    ppi_dataset=dataset,
                    from_protein=protein_a,
                    to_protein=protein_b
                ))
-            except models.ValidationError:
-                continue
        models.ProteinProteinInteraction.objects.bulk_create(bulk)
        return len(bulk)

@@ -149,8 +132,6 @@ class DataPopulator:
        """
        self.cache.init_proteins()

-        if update:
-            models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
        df = DataLoader.load_ppi_biogrid()
        bulk = list()
        for _, row in df.iterrows():
@@ -164,15 +145,12 @@ class DataPopulator:
                continue
            for protein_a in proteins_a:
                for protein_b in proteins_b:
-                    try:
+                    if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
                        bulk.append(models.ProteinProteinInteraction(
                            ppi_dataset=dataset,
                            from_protein=protein_a,
                            to_protein=protein_b
                        ))
-                    except models.ValidationError:
-                        # duplicate
-                        continue
        models.ProteinProteinInteraction.objects.bulk_create(bulk)
        return len(bulk)

@@ -186,8 +164,6 @@ class DataPopulator:
        self.cache.init_proteins()
        self.cache.init_drugs()

-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
        df = DataLoader.load_pdi_chembl()
        bulk = set()
        for _, row in df.iterrows():
@@ -202,11 +178,12 @@ class DataPopulator:
            except KeyError:
                # continue if not found
                continue
-            bulk.add(models.ProteinDrugInteraction(
-                pdi_dataset=dataset,
-                protein=protein,
-                drug=drug
-            ))
+            if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
+                bulk.add(models.ProteinDrugInteraction(
+                    pdi_dataset=dataset,
+                    protein=protein,
+                    drug=drug
+                ))
        models.ProteinDrugInteraction.objects.bulk_create(bulk)
        return len(bulk)

@@ -220,9 +197,6 @@ class DataPopulator:
        self.cache.init_proteins()
        self.cache.init_disorders()

-
-        if update:
-            models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
        df = DataLoader.load_pdis_disgenet()
        bulk = set()
        for _, row in df.iterrows():
@@ -238,12 +212,13 @@ class DataPopulator:
            except KeyError:
                # continue if not found
                continue
-            bulk.add(models.ProteinDisorderAssociation(
-                pdis_dataset=dataset,
-                protein=protein,
-                disorder=disorder,
-                score=row['score']
-            ))
+            if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
+                bulk.add(models.ProteinDisorderAssociation(
+                    pdis_dataset=dataset,
+                    protein=protein,
+                    disorder=disorder,
+                    score=row['score']
+                ))
        models.ProteinDisorderAssociation.objects.bulk_create(bulk)
        return len(bulk)

@@ -256,8 +231,6 @@ class DataPopulator:
        """
        self.cache.init_drugs()
        self.cache.init_disorders()
-        if update:
-            models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()

        df = DataLoader.load_drdis_drugbank()
        bulk = set()
@@ -274,11 +247,12 @@ class DataPopulator:
            except KeyError:
                # continue if not found
                continue
-            bulk.add(models.DrugDisorderIndication(
-                drdi_dataset=dataset,
-                drug=drug,
-                disorder=disorder,
-            ))
+            if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
+                bulk.add(models.DrugDisorderIndication(
+                    drdi_dataset=dataset,
+                    drug=drug,
+                    disorder=disorder,
+                ))
        models.DrugDisorderIndication.objects.bulk_create(bulk)
        return len(bulk)

@@ -292,29 +266,24 @@ class DataPopulator:
        self.cache.init_proteins()
        self.cache.init_drugs()

-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
        df = DataLoader.load_pdi_dgidb()
        bulk = set()
        for _, row in df.iterrows():
            try:
-                # try fetching protein
                proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
            except KeyError:
-                # continue if not found
                continue
            try:
-                # try fetching drug
                drug = self.cache.get_drug_by_drugbank(row['drug_id'])
            except KeyError:
-                # continue if not found
                continue
            for protein in proteins:
-                bulk.add(models.ProteinDrugInteraction(
-                    pdi_dataset=dataset,
-                    protein=protein,
-                    drug=drug
-                ))
+                if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
+                    bulk.add(models.ProteinDrugInteraction(
+                        pdi_dataset=dataset,
+                        protein=protein,
+                        drug=drug
+                    ))
        models.ProteinDrugInteraction.objects.bulk_create(bulk)
        return len(bulk)

@@ -328,29 +297,23 @@ class DataPopulator:
        self.cache.init_proteins()
        self.cache.init_drugs()

-
-        if update:
-            models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
        df = DataLoader.load_pdi_drugbank()
        bulk = set()
        for _, row in df.iterrows():
            try:
-                # try fetching protein
                proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
            except KeyError:
-                # continue if not found
                continue
            try:
-                # try fetching drug
                drug = self.cache.get_drug_by_drugbank(row['drug_id'])
            except KeyError:
-                # continue if not found
                continue
            for protein in proteins:
-                bulk.add(models.ProteinDrugInteraction(
-                    pdi_dataset=dataset,
-                    protein=protein,
-                    drug=drug
-                ))
+                if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
+                    bulk.add(models.ProteinDrugInteraction(
+                        pdi_dataset=dataset,
+                        protein=protein,
+                        drug=drug
+                    ))
        models.ProteinDrugInteraction.objects.bulk_create(bulk)
        return len(bulk)
--- a/drugstone/management/includes/NodeCache.py
+++ b/drugstone/management/includes/NodeCache.py
@@ -10,6 +10,10 @@ class NodeCache:
    disorders = dict()
    drugs = dict()

+    drug_updates = set()
+    disorder_updates = set()
+    protein_updates = set()
+
    def init_protein_maps(self):
        print("Generating protein id maps...")
        for protein in self.proteins.values():
@@ -20,23 +24,39 @@ class NodeCache:
        if len(self.proteins) == 0:
            print("Generating protein maps...")
            for protein in models.Protein.objects.all():
+                if protein.id < 1000:
+                    protein.delete()
+                    continue
                self.proteins[protein.uniprot_code] = protein
        if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
            self.init_protein_maps()

-
    def init_drugs(self):
        if len(self.drugs) == 0:
            print("Generating drug map...")
            for drug in models.Drug.objects.all():
+                if drug.id < 1000:
+                    drug.delete()
+                    continue
                self.drugs[drug.drug_id] = drug

    def init_disorders(self):
        if len(self.disorders) == 0:
            print("Generating disorder map...")
            for disorder in models.Disorder.objects.all():
+                if disorder.id < 1000:
+                    disorder.delete()
+                    continue
                self.disorders[disorder.mondo_id] = disorder

+    def is_new_protein(self, protein:models.Protein):
+        return protein.uniprot_code in self.protein_updates
+
+    def is_new_drug(self, drug:models.Drug):
+        return drug.drug_id in self.drug_updates
+
+    def is_new_disease(self, disease:models.Disorder):
+        return disease.mondo_id in self.disorder_updates

    def get_protein_by_uniprot(self,uniprot_id):
        return self.proteins[uniprot_id]

--- a/drugstone/models.py
+++ b/drugstone/models.py
@@ -84,6 +84,9 @@ class Protein(models.Model):
    def __ne__(self, other):
        return not self.__eq__(other)

+    def __hash__(self):
+        return hash((self.uniprot_code, self.gene, self.entrez))
+
    def update(self, other):
        self.uniprot_code = other.uniprot_code
        self.gene = other.gene

--- a/drugstone/settings/celery_schedule.py
+++ b/drugstone/settings/celery_schedule.py
@@ -3,6 +3,6 @@ from celery.schedules import crontab
 CELERY_BEAT_SCHEDULE = {
    'update_db': {
        'task': 'drugstone.tasks.task_update_db_from_nedrex',
-        'schedule': crontab(minute='*/1'),
+        'schedule': crontab(day_of_week=1, hour=5, minute=0),
    },
 }
--- a/drugstone/tasks.py
+++ b/drugstone/tasks.py
 from celery import shared_task
 from celery.utils.log import get_task_logger
-from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data
+from drugstone.management.commands.populate_db import populate
+from drugstone.management.commands.make_graphs import run as make_graphs

 logger = get_task_logger(__name__)

+nedrex_api_url = "http://82.148.225.92:8123/"
+

 @shared_task
 def task_update_db_from_nedrex():
    logger.info('Updating DB from NeDRex.')
-    print('here')
-
-    logger.info('Fetching data...')
-    # fetch_nedrex_data()
-
-    logger.info('Integrating data...')
-    # integrate_nedrex_data()
+    logger.info('Updating data...')
+    populate({"all": True, "update": True})
+    logger.info('Recreating networks...')
+    make_graphs()
    logger.info('Done.')
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"



-if ! test -f "$file"; then
+#if ! test -f "$file"; then
 #    sh scripts/import-data.sh
    python3 manage.py makemigrations drugstone
    python3 manage.py migrate
@@ -14,6 +14,6 @@ if ! test -f "$file"; then
    python3 manage.py populate_db -u --all
    python3 manage.py make_graphs
    touch $file
-fi
+#fi

 /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"