From e5962ad1ababbb9a289058e3a1c5871f061b9e71 Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Tue, 5 Jul 2022 17:05:57 +0200 Subject: [PATCH] fixed make_graphs; added individual source datasets from nedrex Former-commit-id: 640d8907dc42bfb580198df3b9604522b7217ad7 [formerly 3bc93bec4b541af7214304f3ac9aea93432a419a] Former-commit-id: 3c2f33d7f54d45181aecdb86da82770b0e8d9674 --- .../management/commands/import_from_nedrex.py | 28 ++++- drugstone/management/commands/make_graphs.py | 35 +++--- drugstone/management/commands/populate_db.py | 24 ++-- .../management/includes/DatasetLoader.py | 103 ++++++++++++++++++ drugstone/tasks.py | 10 +- scripts/docker-entrypoint.sh | 10 +- scripts/start_celery_worker.sh | 1 + 7 files changed, 171 insertions(+), 40 deletions(-) diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py index 6f11679..7043e31 100644 --- a/drugstone/management/commands/import_from_nedrex.py +++ b/drugstone/management/commands/import_from_nedrex.py @@ -5,6 +5,7 @@ from python_nedrex.core import get_nodes, get_edges, get_api_key from drugstone import models from drugstone.management.includes.NodeCache import NodeCache +from drugstone.management.includes import DatasetLoader def iter_node_collection(coll_name, eval): @@ -57,10 +58,12 @@ def to_id(string): class NedrexImporter: cache: NodeCache = None + url: str = '' def __init__(self, base_url, cache: NodeCache): self.cache = cache nedrex.config.set_url_base(base_url) + self.url = base_url api_key = get_api_key(accept_eula=True) nedrex.config.set_api_key(api_key) @@ -187,6 +190,8 @@ class NedrexImporter: for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset): existing.add(edge.__hash__()) + source_datasets = DatasetLoader.get_pdr_nedrex_datasets(self.url) + def add_dpi(edge): try: drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId'])) @@ -194,6 +199,10 @@ class NedrexImporter: e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein) if not update or e.__hash__() not in existing: bulk.add(e) + for source in edge['assertedBy']: + bulk.add( + models.ProteinProteinInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein)) + except KeyError: pass @@ -210,6 +219,8 @@ class NedrexImporter: for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset): existing.add(edge.__hash__()) + source_datasets = DatasetLoader.get_ppi_nedrex_datasets(self.url) + def iter_ppi(eval): from python_nedrex import ppi offset = 0 @@ -226,9 +237,13 @@ class NedrexImporter: try: protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne'])) protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo'])) - e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2) + e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2) if not update or e.__hash__() not in existing: bulk.append(e) + for source in edge['assertedBy']: + bulk.append( + models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1, + to_protein=protein2)) except KeyError: pass @@ -246,6 +261,8 @@ class NedrexImporter: for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset): existing.add(edge.__hash__()) + source_datasets = DatasetLoader.get_dis_prot_nedrex_datasets(self.url) + def add_pdis(edge): try: disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId'])) @@ -254,6 +271,10 @@ class NedrexImporter: score=edge['score']) if not update or e.__hash__() not in existing: bulk.add(e) + for source in edge['assertedBy']: + bulk.add( + models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder, + score=edge['score'])) except KeyError: pass @@ -271,6 +292,8 @@ class NedrexImporter: for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset): existing.add(edge.__hash__()) + source_datasets = DatasetLoader.get_drdis_nedrex_datasets(self.url) + def add_drdis(edge): try: drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId'])) @@ -278,6 +301,9 @@ class NedrexImporter: e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder) if not update or e.__hash__() not in existing: bulk.add(e) + for source in edge['assertedBy']: + bulk.add( + models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder)) except KeyError: pass diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py index e024555..a6c3d81 100755 --- a/drugstone/management/commands/make_graphs.py +++ b/drugstone/management/commands/make_graphs.py @@ -187,22 +187,19 @@ class Command(BaseCommand): pass def handle(self, *args, **kwargs): - run() - -def run(): - ppi_datasets = models.PPIDataset.objects.all() - ppi_datasets_names = [e.name for e in ppi_datasets] - - pdi_datasets = models.PDIDataset.objects.all() - pdi_datasets_names = [e.name for e in pdi_datasets] - - parameter_combinations = [] - for protein_interaction_dataset in ppi_datasets_names: - for pdi_dataset in pdi_datasets_names: - parameter_combinations.append((protein_interaction_dataset, pdi_dataset)) - - # close all database connections so subprocesses will create their own connections - # this prevents the processes from running into problems because of using the same connection - db.connections.close_all() - pool = multiprocessing.Pool(KERNEL) - pool.map(create_gt, parameter_combinations) \ No newline at end of file + ppi_datasets = models.PPIDataset.objects.all() + ppi_datasets_names = [e.name for e in ppi_datasets] + + pdi_datasets = models.PDIDataset.objects.all() + pdi_datasets_names = [e.name for e in pdi_datasets] + + parameter_combinations = [] + for protein_interaction_dataset in ppi_datasets_names: + for pdi_dataset in pdi_datasets_names: + parameter_combinations.append((protein_interaction_dataset, pdi_dataset)) + + # close all database connections so subprocesses will create their own connections + # this prevents the processes from running into problems because of using the same connection + db.connections.close_all() + pool = multiprocessing.Pool(KERNEL) + pool.map(create_gt, parameter_combinations) diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py index 476d60d..eba9987 100755 --- a/drugstone/management/commands/populate_db.py +++ b/drugstone/management/commands/populate_db.py @@ -159,10 +159,10 @@ def populate(kwargs): total_n += n print(f'Populated {n} PPIs from APID.') - print('Populating PPIs from BioGRID...') - n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update) - total_n += n - print(f'Populated {n} PPIs from BioGRID.') + # print('Populating PPIs from BioGRID...') + # n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update) + # total_n += n + # print(f'Populated {n} PPIs from BioGRID.') if kwargs['protein_drug']: print('Importing PDIs from NeDRexDB...') @@ -182,10 +182,10 @@ def populate(kwargs): total_n += n print(f'Populated {n} PDIs from DGIdb.') - print('Populating PDIs from DrugBank...') - n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update) - total_n += n - print(f'Populated {n} PDIs from DrugBank.') + # print('Populating PDIs from DrugBank...') + # n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update) + # total_n += n + # print(f'Populated {n} PDIs from DrugBank.') if kwargs['protein_disorder']: print('Importing PDis from NeDRexDB...') @@ -195,10 +195,10 @@ def populate(kwargs): update) total_n += n print(f'Imported {n} PDis from NeDRexDB') - print('Populating PDis associations from DisGeNET...') - n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update) - total_n += n - print(f'Populated {n} PDis associations from DisGeNET.') + # print('Populating PDis associations from DisGeNET...') + # n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update) + # total_n += n + # print(f'Populated {n} PDis associations from DisGeNET.') if kwargs['drug_disorder']: print('Importing DrDis from NeDRexDB...') diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py index f608ed1..27816cc 100644 --- a/drugstone/management/includes/DatasetLoader.py +++ b/drugstone/management/includes/DatasetLoader.py @@ -1,6 +1,9 @@ from drugstone import models from python_nedrex.static import get_metadata +ppi_nedrex_datasets = dict() + + def get_ppi_string(): dataset, _ = models.PPIDataset.objects.get_or_create( name='STRING', @@ -9,6 +12,7 @@ def get_ppi_string(): ) return dataset + def get_ppi_apid(): dataset, _ = models.PPIDataset.objects.get_or_create( name='APID', @@ -17,6 +21,34 @@ def get_ppi_apid(): ) return dataset + +def get_ppi_nedrex_biogrid(url): + dataset, _ = models.PPIDataset.objects.get_or_create( + name='BioGRID', + link=url, + version=get_metadata()['source_databases']['biogrid']['date'] + ) + return dataset + + +def get_ppi_nedrex_iid(url): + dataset, _ = models.PPIDataset.objects.get_or_create( + name='IID', + link=url, + version=get_metadata()['source_databases']['iid']['date'] + ) + return dataset + + +def get_ppi_nedrex_intact(url): + dataset, _ = models.PPIDataset.objects.get_or_create( + name='IntAct', + link=url, + version=get_metadata()['source_databases']['intact']['date'] + ) + return dataset + + def get_ppi_biogrid(): dataset, _ = models.PPIDataset.objects.get_or_create( name='BioGRID', @@ -25,6 +57,7 @@ def get_ppi_biogrid(): ) return dataset + def get_drug_target_nedrex(url): dataset, _ = models.PDIDataset.objects.get_or_create( name='NeDRex', @@ -33,6 +66,7 @@ def get_drug_target_nedrex(url): ) return dataset + def get_ppi_nedrex(url): dataset, _ = models.PPIDataset.objects.get_or_create( name='NeDRex', @@ -41,6 +75,7 @@ def get_ppi_nedrex(url): ) return dataset + def get_protein_disorder_nedrex(url): dataset, _ = models.PDisDataset.objects.get_or_create( name='NeDRex', @@ -49,6 +84,7 @@ def get_protein_disorder_nedrex(url): ) return dataset + def get_drug_disorder_nedrex(url): dataset, _ = models.DrDiDataset.objects.get_or_create( name='NeDRex', @@ -57,6 +93,7 @@ def get_drug_disorder_nedrex(url): ) return dataset + def get_drug_target_chembl(): dataset, _ = models.PDIDataset.objects.get_or_create( name='ChEMBL', @@ -65,6 +102,7 @@ def get_drug_target_chembl(): ) return dataset + def get_drug_target_dgidb(): dataset, _ = models.PDIDataset.objects.get_or_create( name='DGIdb', @@ -73,6 +111,7 @@ def get_drug_target_dgidb(): ) return dataset + def get_drug_target_drugbank(): dataset, _ = models.PDIDataset.objects.get_or_create( name='DrugBank', @@ -81,6 +120,7 @@ def get_drug_target_drugbank(): ) return dataset + def get_disorder_protein_disgenet(): dataset, _ = models.PDisDataset.objects.get_or_create( name='DisGeNET', @@ -97,3 +137,66 @@ def get_drug_disorder_drugbank(): version='5.1.8', ) return dataset + + +def get_dis_prot_nedrex_disgenet(url): + dataset, _ = models.PDisDataset.objects.get_or_create( + name='DisGeNET', + link=url, + version=get_metadata()['source_databases']['disgenet']['date'] + ) + return dataset + + +def get_dis_prot_nedrex_omim(url): + dataset, _ = models.PDisDataset.objects.get_or_create( + name='OMIM', + link=url, + version=get_metadata()['source_databases']['omim']['date'] + ) + return dataset + + +def get_drdis_nedrex_drugcentral(url): + dataset, _ = models.DrDiDataset.objects.get_or_create( + name='Drug Central', + link=url, + version=get_metadata()['source_databases']['drug_central']['date'] + ) + return dataset + +def get_drdis_nedrex_ctd(url): + dataset, _ = models.DrDiDataset.objects.get_or_create( + name='CTD', + link=url, + version=get_metadata()['source_databases']['ctd']['date'] + ) + return dataset + +def get_pdr_nedrex_drugcentral(url): + dataset, _ = models.PDIDataset.objects.get_or_create( + name='Drug Central', + link=url, + version=get_metadata()['source_databases']['drug_central']['date'] + ) + return dataset + +def get_pdr_nedrex_drugbank(url): + dataset, _ = models.PDIDataset.objects.get_or_create( + name='DrugBank', + link=url, + version=get_metadata()['source_databases']['drugbank']['date'] + ) + return dataset + +def get_pdr_nedrex_datasets(url): + return {'drugbank': get_pdr_nedrex_drugbank(url), 'drug_central': get_pdr_nedrex_drugcentral(url)} + +def get_drdis_nedrex_datasets(url): + return {'ctd':get_drdis_nedrex_ctd(url), 'drug_central':get_drdis_nedrex_drugcentral(url)} + +def get_ppi_nedrex_datasets(url): + return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)} + +def get_dis_prot_nedrex_datasets(url): + return {'disgenet': get_dis_prot_nedrex_disgenet(url), 'omim': get_dis_prot_nedrex_omim(url)} \ No newline at end of file diff --git a/drugstone/tasks.py b/drugstone/tasks.py index 97c5ac7..d190741 100644 --- a/drugstone/tasks.py +++ b/drugstone/tasks.py @@ -1,7 +1,8 @@ +import subprocess + from celery import shared_task from celery.utils.log import get_task_logger from drugstone.management.commands.populate_db import populate -from drugstone.management.commands.make_graphs import run as make_graphs logger = get_task_logger(__name__) @@ -15,7 +16,10 @@ def task_update_db_from_nedrex(): logger.info('Updating data...') n = populate({"all": True, "update": True, "data_dir": data_dir}) logger.info(f'Added {n} entries!') - if n > 0: + if 1 > 0: logger.info('Recreating networks...') - make_graphs() + proc = subprocess.Popen(['python3', '/usr/src/drugstone/manage.py', 'make_graphs']) + out,err = proc.communicate() + print(out) + print(err) logger.info('Done.') diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index a525f29..f4c05b7 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -5,15 +5,15 @@ file="store/docker-entrypoint.lock" -if ! test -f "$file"; then +#if ! test -f "$file"; then # sh scripts/import-data.sh python3 manage.py makemigrations drugstone python3 manage.py migrate python3 manage.py createfixtures python3 manage.py cleanuptasks - python3 manage.py populate_db -u --all - python3 manage.py make_graphs - touch $file -fi +# python3 manage.py populate_db -u --all +# python3 manage.py make_graphs +# touch $file +#fi /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" diff --git a/scripts/start_celery_worker.sh b/scripts/start_celery_worker.sh index bd9fa81..7f46185 100644 --- a/scripts/start_celery_worker.sh +++ b/scripts/start_celery_worker.sh @@ -1 +1,2 @@ +sleep 10 celery -A drugstone worker -l INFO \ No newline at end of file -- GitLab