From cd7789a58b51a020684277906283cc6f1b4e84e8 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Tue, 5 Jul 2022 17:05:57 +0200
Subject: [PATCH] fixed make_graphs; added individual source datasets from
 nedrex

---
 .../management/commands/import_from_nedrex.py |  28 ++++-
 drugstone/management/commands/make_graphs.py  |  35 +++---
 drugstone/management/commands/populate_db.py  |  24 ++--
 .../management/includes/DatasetLoader.py      | 103 ++++++++++++++++++
 drugstone/tasks.py                            |  10 +-
 scripts/docker-entrypoint.sh                  |  10 +-
 scripts/start_celery_worker.sh                |   1 +
 7 files changed, 171 insertions(+), 40 deletions(-)

diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py
index 6f11679..7043e31 100644
--- a/drugstone/management/commands/import_from_nedrex.py
+++ b/drugstone/management/commands/import_from_nedrex.py
@@ -5,6 +5,7 @@ from python_nedrex.core import get_nodes, get_edges, get_api_key
 
 from drugstone import models
 from drugstone.management.includes.NodeCache import NodeCache
+from drugstone.management.includes import DatasetLoader
 
 
 def iter_node_collection(coll_name, eval):
@@ -57,10 +58,12 @@ def to_id(string):
 
 class NedrexImporter:
     cache: NodeCache = None
+    url: str = ''
 
     def __init__(self, base_url, cache: NodeCache):
         self.cache = cache
         nedrex.config.set_url_base(base_url)
+        self.url = base_url
         api_key = get_api_key(accept_eula=True)
         nedrex.config.set_api_key(api_key)
 
@@ -187,6 +190,8 @@ class NedrexImporter:
             for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset):
                 existing.add(edge.__hash__())
 
+        source_datasets = DatasetLoader.get_pdr_nedrex_datasets(self.url)
+
         def add_dpi(edge):
             try:
                 drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
@@ -194,6 +199,10 @@ class NedrexImporter:
                 e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
                 if not update or e.__hash__() not in existing:
                     bulk.add(e)
+                    for source in edge['assertedBy']:
+                        bulk.add(
+                            models.ProteinProteinInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein))
+
             except KeyError:
                 pass
 
@@ -210,6 +219,8 @@ class NedrexImporter:
             for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset):
                 existing.add(edge.__hash__())
 
+        source_datasets = DatasetLoader.get_ppi_nedrex_datasets(self.url)
+
         def iter_ppi(eval):
             from python_nedrex import ppi
             offset = 0
@@ -226,9 +237,13 @@ class NedrexImporter:
             try:
                 protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
                 protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
-                e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2)
+                e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2)
                 if not update or e.__hash__() not in existing:
                     bulk.append(e)
+                    for source in edge['assertedBy']:
+                        bulk.append(
+                            models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1,
+                                                             to_protein=protein2))
             except KeyError:
                 pass
 
@@ -246,6 +261,8 @@ class NedrexImporter:
             for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset):
                 existing.add(edge.__hash__())
 
+        source_datasets = DatasetLoader.get_dis_prot_nedrex_datasets(self.url)
+
         def add_pdis(edge):
             try:
                 disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
@@ -254,6 +271,10 @@ class NedrexImporter:
                                                           score=edge['score'])
                     if not update or e.__hash__() not in existing:
                         bulk.add(e)
+                        for source in edge['assertedBy']:
+                            bulk.add(
+                                models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder,
+                                                          score=edge['score']))
             except KeyError:
                 pass
 
@@ -271,6 +292,8 @@ class NedrexImporter:
             for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset):
                 existing.add(edge.__hash__())
 
+        source_datasets = DatasetLoader.get_drdis_nedrex_datasets(self.url)
+
         def add_drdis(edge):
             try:
                 drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
@@ -278,6 +301,9 @@ class NedrexImporter:
                 e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
                 if not update or e.__hash__() not in existing:
                     bulk.add(e)
+                    for source in edge['assertedBy']:
+                        bulk.add(
+                            models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder))
             except KeyError:
                 pass
 
diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py
index e024555..a6c3d81 100755
--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -187,22 +187,19 @@ class Command(BaseCommand):
         pass
 
     def handle(self, *args, **kwargs):
-        run()
-
-def run():
-    ppi_datasets = models.PPIDataset.objects.all()
-    ppi_datasets_names = [e.name for e in ppi_datasets]
-
-    pdi_datasets = models.PDIDataset.objects.all()
-    pdi_datasets_names = [e.name for e in pdi_datasets]
-
-    parameter_combinations = []
-    for protein_interaction_dataset in ppi_datasets_names:
-        for pdi_dataset in pdi_datasets_names:
-            parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
-
-    # close all database connections so subprocesses will create their own connections
-    # this prevents the processes from running into problems because of using the same connection
-    db.connections.close_all()
-    pool = multiprocessing.Pool(KERNEL)
-    pool.map(create_gt, parameter_combinations)
\ No newline at end of file
+        ppi_datasets = models.PPIDataset.objects.all()
+        ppi_datasets_names = [e.name for e in ppi_datasets]
+
+        pdi_datasets = models.PDIDataset.objects.all()
+        pdi_datasets_names = [e.name for e in pdi_datasets]
+
+        parameter_combinations = []
+        for protein_interaction_dataset in ppi_datasets_names:
+            for pdi_dataset in pdi_datasets_names:
+                parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
+
+        # close all database connections so subprocesses will create their own connections
+        # this prevents the processes from running into problems because of using the same connection
+        db.connections.close_all()
+        pool = multiprocessing.Pool(KERNEL)
+        pool.map(create_gt, parameter_combinations)
diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py
index 476d60d..eba9987 100755
--- a/drugstone/management/commands/populate_db.py
+++ b/drugstone/management/commands/populate_db.py
@@ -159,10 +159,10 @@ def populate(kwargs):
         total_n += n
         print(f'Populated {n} PPIs from APID.')
 
-        print('Populating PPIs from BioGRID...')
-        n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
-        total_n += n
-        print(f'Populated {n} PPIs from BioGRID.')
+        # print('Populating PPIs from BioGRID...')
+        # n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
+        # total_n += n
+        # print(f'Populated {n} PPIs from BioGRID.')
 
     if kwargs['protein_drug']:
         print('Importing PDIs from NeDRexDB...')
@@ -182,10 +182,10 @@ def populate(kwargs):
         total_n += n
         print(f'Populated {n} PDIs from DGIdb.')
 
-        print('Populating PDIs from DrugBank...')
-        n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
-        total_n += n
-        print(f'Populated {n} PDIs from DrugBank.')
+        # print('Populating PDIs from DrugBank...')
+        # n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
+        # total_n += n
+        # print(f'Populated {n} PDIs from DrugBank.')
 
     if kwargs['protein_disorder']:
         print('Importing PDis from NeDRexDB...')
@@ -195,10 +195,10 @@ def populate(kwargs):
                                                                 update)
         total_n += n
         print(f'Imported {n} PDis from NeDRexDB')
-        print('Populating PDis associations from DisGeNET...')
-        n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
-        total_n += n
-        print(f'Populated {n} PDis associations from DisGeNET.')
+        # print('Populating PDis associations from DisGeNET...')
+        # n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
+        # total_n += n
+        # print(f'Populated {n} PDis associations from DisGeNET.')
 
     if kwargs['drug_disorder']:
         print('Importing DrDis from NeDRexDB...')
diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py
index f608ed1..27816cc 100644
--- a/drugstone/management/includes/DatasetLoader.py
+++ b/drugstone/management/includes/DatasetLoader.py
@@ -1,6 +1,9 @@
 from drugstone import models
 from python_nedrex.static import get_metadata
 
+ppi_nedrex_datasets = dict()
+
+
 def get_ppi_string():
     dataset, _ = models.PPIDataset.objects.get_or_create(
         name='STRING',
@@ -9,6 +12,7 @@ def get_ppi_string():
     )
     return dataset
 
+
 def get_ppi_apid():
     dataset, _ = models.PPIDataset.objects.get_or_create(
         name='APID',
@@ -17,6 +21,34 @@ def get_ppi_apid():
     )
     return dataset
 
+
+def get_ppi_nedrex_biogrid(url):
+    dataset, _ = models.PPIDataset.objects.get_or_create(
+        name='BioGRID',
+        link=url,
+        version=get_metadata()['source_databases']['biogrid']['date']
+    )
+    return dataset
+
+
+def get_ppi_nedrex_iid(url):
+    dataset, _ = models.PPIDataset.objects.get_or_create(
+        name='IID',
+        link=url,
+        version=get_metadata()['source_databases']['iid']['date']
+    )
+    return dataset
+
+
+def get_ppi_nedrex_intact(url):
+    dataset, _ = models.PPIDataset.objects.get_or_create(
+        name='IntAct',
+        link=url,
+        version=get_metadata()['source_databases']['intact']['date']
+    )
+    return dataset
+
+
 def get_ppi_biogrid():
     dataset, _ = models.PPIDataset.objects.get_or_create(
         name='BioGRID',
@@ -25,6 +57,7 @@ def get_ppi_biogrid():
     )
     return dataset
 
+
 def get_drug_target_nedrex(url):
     dataset, _ = models.PDIDataset.objects.get_or_create(
         name='NeDRex',
@@ -33,6 +66,7 @@ def get_drug_target_nedrex(url):
     )
     return dataset
 
+
 def get_ppi_nedrex(url):
     dataset, _ = models.PPIDataset.objects.get_or_create(
         name='NeDRex',
@@ -41,6 +75,7 @@ def get_ppi_nedrex(url):
     )
     return dataset
 
+
 def get_protein_disorder_nedrex(url):
     dataset, _ = models.PDisDataset.objects.get_or_create(
         name='NeDRex',
@@ -49,6 +84,7 @@ def get_protein_disorder_nedrex(url):
     )
     return dataset
 
+
 def get_drug_disorder_nedrex(url):
     dataset, _ = models.DrDiDataset.objects.get_or_create(
         name='NeDRex',
@@ -57,6 +93,7 @@ def get_drug_disorder_nedrex(url):
     )
     return dataset
 
+
 def get_drug_target_chembl():
     dataset, _ = models.PDIDataset.objects.get_or_create(
         name='ChEMBL',
@@ -65,6 +102,7 @@ def get_drug_target_chembl():
     )
     return dataset
 
+
 def get_drug_target_dgidb():
     dataset, _ = models.PDIDataset.objects.get_or_create(
         name='DGIdb',
@@ -73,6 +111,7 @@ def get_drug_target_dgidb():
     )
     return dataset
 
+
 def get_drug_target_drugbank():
     dataset, _ = models.PDIDataset.objects.get_or_create(
         name='DrugBank',
@@ -81,6 +120,7 @@ def get_drug_target_drugbank():
     )
     return dataset
 
+
 def get_disorder_protein_disgenet():
     dataset, _ = models.PDisDataset.objects.get_or_create(
         name='DisGeNET',
@@ -97,3 +137,66 @@ def get_drug_disorder_drugbank():
         version='5.1.8',
     )
     return dataset
+
+
+def get_dis_prot_nedrex_disgenet(url):
+    dataset, _ = models.PDisDataset.objects.get_or_create(
+        name='DisGeNET',
+        link=url,
+        version=get_metadata()['source_databases']['disgenet']['date']
+    )
+    return dataset
+
+
+def get_dis_prot_nedrex_omim(url):
+    dataset, _ = models.PDisDataset.objects.get_or_create(
+        name='OMIM',
+        link=url,
+        version=get_metadata()['source_databases']['omim']['date']
+    )
+    return dataset
+
+
+def get_drdis_nedrex_drugcentral(url):
+    dataset, _ = models.DrDiDataset.objects.get_or_create(
+        name='Drug Central',
+        link=url,
+        version=get_metadata()['source_databases']['drug_central']['date']
+    )
+    return dataset
+
+def get_drdis_nedrex_ctd(url):
+    dataset, _ = models.DrDiDataset.objects.get_or_create(
+        name='CTD',
+        link=url,
+        version=get_metadata()['source_databases']['ctd']['date']
+    )
+    return dataset
+
+def get_pdr_nedrex_drugcentral(url):
+    dataset, _ = models.PDIDataset.objects.get_or_create(
+        name='Drug Central',
+        link=url,
+        version=get_metadata()['source_databases']['drug_central']['date']
+    )
+    return dataset
+
+def get_pdr_nedrex_drugbank(url):
+    dataset, _ = models.PDIDataset.objects.get_or_create(
+        name='DrugBank',
+        link=url,
+        version=get_metadata()['source_databases']['drugbank']['date']
+    )
+    return dataset
+
+def get_pdr_nedrex_datasets(url):
+    return {'drugbank': get_pdr_nedrex_drugbank(url), 'drug_central': get_pdr_nedrex_drugcentral(url)}
+
+def get_drdis_nedrex_datasets(url):
+    return {'ctd':get_drdis_nedrex_ctd(url), 'drug_central':get_drdis_nedrex_drugcentral(url)}
+
+def get_ppi_nedrex_datasets(url):
+    return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)}
+
+def get_dis_prot_nedrex_datasets(url):
+    return {'disgenet': get_dis_prot_nedrex_disgenet(url), 'omim': get_dis_prot_nedrex_omim(url)}
\ No newline at end of file
diff --git a/drugstone/tasks.py b/drugstone/tasks.py
index 97c5ac7..d190741 100644
--- a/drugstone/tasks.py
+++ b/drugstone/tasks.py
@@ -1,7 +1,8 @@
+import subprocess
+
 from celery import shared_task
 from celery.utils.log import get_task_logger
 from drugstone.management.commands.populate_db import populate
-from drugstone.management.commands.make_graphs import run as make_graphs
 
 logger = get_task_logger(__name__)
 
@@ -15,7 +16,10 @@ def task_update_db_from_nedrex():
     logger.info('Updating data...')
     n = populate({"all": True, "update": True, "data_dir": data_dir})
     logger.info(f'Added {n} entries!')
-    if n > 0:
+    if 1 > 0:
         logger.info('Recreating networks...')
-        make_graphs()
+        proc = subprocess.Popen(['python3', '/usr/src/drugstone/manage.py', 'make_graphs'])
+        out,err = proc.communicate()
+        print(out)
+        print(err)
     logger.info('Done.')
diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh
index a525f29..f4c05b7 100755
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -5,15 +5,15 @@ file="store/docker-entrypoint.lock"
 
 
 
-if ! test -f "$file"; then
+#if ! test -f "$file"; then
 #    sh scripts/import-data.sh
     python3 manage.py makemigrations drugstone
     python3 manage.py migrate
     python3 manage.py createfixtures
     python3 manage.py cleanuptasks
-    python3 manage.py populate_db -u --all
-    python3 manage.py make_graphs
-    touch $file
-fi
+#    python3 manage.py populate_db -u --all
+#    python3 manage.py make_graphs
+#    touch $file
+#fi
 
 /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
diff --git a/scripts/start_celery_worker.sh b/scripts/start_celery_worker.sh
index bd9fa81..7f46185 100644
--- a/scripts/start_celery_worker.sh
+++ b/scripts/start_celery_worker.sh
@@ -1 +1,2 @@
+sleep 10
 celery -A drugstone worker -l INFO
\ No newline at end of file
-- 
GitLab