Skip to content
Snippets Groups Projects
Commit e5962ad1 authored by AndiMajore's avatar AndiMajore
Browse files

fixed make_graphs; added individual source datasets from nedrex

Former-commit-id: 640d8907dc42bfb580198df3b9604522b7217ad7 [formerly 3bc93bec4b541af7214304f3ac9aea93432a419a]
Former-commit-id: 3c2f33d7f54d45181aecdb86da82770b0e8d9674
parent 005a116f
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ from python_nedrex.core import get_nodes, get_edges, get_api_key ...@@ -5,6 +5,7 @@ from python_nedrex.core import get_nodes, get_edges, get_api_key
from drugstone import models from drugstone import models
from drugstone.management.includes.NodeCache import NodeCache from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
def iter_node_collection(coll_name, eval): def iter_node_collection(coll_name, eval):
...@@ -57,10 +58,12 @@ def to_id(string): ...@@ -57,10 +58,12 @@ def to_id(string):
class NedrexImporter: class NedrexImporter:
cache: NodeCache = None cache: NodeCache = None
url: str = ''
def __init__(self, base_url, cache: NodeCache): def __init__(self, base_url, cache: NodeCache):
self.cache = cache self.cache = cache
nedrex.config.set_url_base(base_url) nedrex.config.set_url_base(base_url)
self.url = base_url
api_key = get_api_key(accept_eula=True) api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key) nedrex.config.set_api_key(api_key)
...@@ -187,6 +190,8 @@ class NedrexImporter: ...@@ -187,6 +190,8 @@ class NedrexImporter:
for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset): for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset):
existing.add(edge.__hash__()) existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_pdr_nedrex_datasets(self.url)
def add_dpi(edge): def add_dpi(edge):
try: try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId'])) drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
...@@ -194,6 +199,10 @@ class NedrexImporter: ...@@ -194,6 +199,10 @@ class NedrexImporter:
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein) e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.add(e) bulk.add(e)
for source in edge['assertedBy']:
bulk.add(
models.ProteinProteinInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein))
except KeyError: except KeyError:
pass pass
...@@ -210,6 +219,8 @@ class NedrexImporter: ...@@ -210,6 +219,8 @@ class NedrexImporter:
for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset): for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset):
existing.add(edge.__hash__()) existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_ppi_nedrex_datasets(self.url)
def iter_ppi(eval): def iter_ppi(eval):
from python_nedrex import ppi from python_nedrex import ppi
offset = 0 offset = 0
...@@ -226,9 +237,13 @@ class NedrexImporter: ...@@ -226,9 +237,13 @@ class NedrexImporter:
try: try:
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne'])) protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo'])) protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2) e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2)
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.append(e) bulk.append(e)
for source in edge['assertedBy']:
bulk.append(
models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1,
to_protein=protein2))
except KeyError: except KeyError:
pass pass
...@@ -246,6 +261,8 @@ class NedrexImporter: ...@@ -246,6 +261,8 @@ class NedrexImporter:
for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset): for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset):
existing.add(edge.__hash__()) existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_dis_prot_nedrex_datasets(self.url)
def add_pdis(edge): def add_pdis(edge):
try: try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId'])) disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
...@@ -254,6 +271,10 @@ class NedrexImporter: ...@@ -254,6 +271,10 @@ class NedrexImporter:
score=edge['score']) score=edge['score'])
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.add(e) bulk.add(e)
for source in edge['assertedBy']:
bulk.add(
models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder,
score=edge['score']))
except KeyError: except KeyError:
pass pass
...@@ -271,6 +292,8 @@ class NedrexImporter: ...@@ -271,6 +292,8 @@ class NedrexImporter:
for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset): for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset):
existing.add(edge.__hash__()) existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_drdis_nedrex_datasets(self.url)
def add_drdis(edge): def add_drdis(edge):
try: try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId'])) drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
...@@ -278,6 +301,9 @@ class NedrexImporter: ...@@ -278,6 +301,9 @@ class NedrexImporter:
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder) e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.add(e) bulk.add(e)
for source in edge['assertedBy']:
bulk.add(
models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder))
except KeyError: except KeyError:
pass pass
......
...@@ -187,22 +187,19 @@ class Command(BaseCommand): ...@@ -187,22 +187,19 @@ class Command(BaseCommand):
pass pass
def handle(self, *args, **kwargs): def handle(self, *args, **kwargs):
run() ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
def run():
ppi_datasets = models.PPIDataset.objects.all() pdi_datasets = models.PDIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets] pdi_datasets_names = [e.name for e in pdi_datasets]
pdi_datasets = models.PDIDataset.objects.all() parameter_combinations = []
pdi_datasets_names = [e.name for e in pdi_datasets] for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names:
parameter_combinations = [] parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names: # close all database connections so subprocesses will create their own connections
parameter_combinations.append((protein_interaction_dataset, pdi_dataset)) # this prevents the processes from running into problems because of using the same connection
db.connections.close_all()
# close all database connections so subprocesses will create their own connections pool = multiprocessing.Pool(KERNEL)
# this prevents the processes from running into problems because of using the same connection pool.map(create_gt, parameter_combinations)
db.connections.close_all()
pool = multiprocessing.Pool(KERNEL)
pool.map(create_gt, parameter_combinations)
\ No newline at end of file
...@@ -159,10 +159,10 @@ def populate(kwargs): ...@@ -159,10 +159,10 @@ def populate(kwargs):
total_n += n total_n += n
print(f'Populated {n} PPIs from APID.') print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...') # print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update) # n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
total_n += n # total_n += n
print(f'Populated {n} PPIs from BioGRID.') # print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']: if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...') print('Importing PDIs from NeDRexDB...')
...@@ -182,10 +182,10 @@ def populate(kwargs): ...@@ -182,10 +182,10 @@ def populate(kwargs):
total_n += n total_n += n
print(f'Populated {n} PDIs from DGIdb.') print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...') # print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update) # n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
total_n += n # total_n += n
print(f'Populated {n} PDIs from DrugBank.') # print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']: if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...') print('Importing PDis from NeDRexDB...')
...@@ -195,10 +195,10 @@ def populate(kwargs): ...@@ -195,10 +195,10 @@ def populate(kwargs):
update) update)
total_n += n total_n += n
print(f'Imported {n} PDis from NeDRexDB') print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...') # print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update) # n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
total_n += n # total_n += n
print(f'Populated {n} PDis associations from DisGeNET.') # print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']: if kwargs['drug_disorder']:
print('Importing DrDis from NeDRexDB...') print('Importing DrDis from NeDRexDB...')
......
from drugstone import models from drugstone import models
from python_nedrex.static import get_metadata from python_nedrex.static import get_metadata
ppi_nedrex_datasets = dict()
def get_ppi_string(): def get_ppi_string():
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='STRING', name='STRING',
...@@ -9,6 +12,7 @@ def get_ppi_string(): ...@@ -9,6 +12,7 @@ def get_ppi_string():
) )
return dataset return dataset
def get_ppi_apid(): def get_ppi_apid():
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='APID', name='APID',
...@@ -17,6 +21,34 @@ def get_ppi_apid(): ...@@ -17,6 +21,34 @@ def get_ppi_apid():
) )
return dataset return dataset
def get_ppi_nedrex_biogrid(url):
dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID',
link=url,
version=get_metadata()['source_databases']['biogrid']['date']
)
return dataset
def get_ppi_nedrex_iid(url):
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IID',
link=url,
version=get_metadata()['source_databases']['iid']['date']
)
return dataset
def get_ppi_nedrex_intact(url):
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IntAct',
link=url,
version=get_metadata()['source_databases']['intact']['date']
)
return dataset
def get_ppi_biogrid(): def get_ppi_biogrid():
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID', name='BioGRID',
...@@ -25,6 +57,7 @@ def get_ppi_biogrid(): ...@@ -25,6 +57,7 @@ def get_ppi_biogrid():
) )
return dataset return dataset
def get_drug_target_nedrex(url): def get_drug_target_nedrex(url):
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
...@@ -33,6 +66,7 @@ def get_drug_target_nedrex(url): ...@@ -33,6 +66,7 @@ def get_drug_target_nedrex(url):
) )
return dataset return dataset
def get_ppi_nedrex(url): def get_ppi_nedrex(url):
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
...@@ -41,6 +75,7 @@ def get_ppi_nedrex(url): ...@@ -41,6 +75,7 @@ def get_ppi_nedrex(url):
) )
return dataset return dataset
def get_protein_disorder_nedrex(url): def get_protein_disorder_nedrex(url):
dataset, _ = models.PDisDataset.objects.get_or_create( dataset, _ = models.PDisDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
...@@ -49,6 +84,7 @@ def get_protein_disorder_nedrex(url): ...@@ -49,6 +84,7 @@ def get_protein_disorder_nedrex(url):
) )
return dataset return dataset
def get_drug_disorder_nedrex(url): def get_drug_disorder_nedrex(url):
dataset, _ = models.DrDiDataset.objects.get_or_create( dataset, _ = models.DrDiDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
...@@ -57,6 +93,7 @@ def get_drug_disorder_nedrex(url): ...@@ -57,6 +93,7 @@ def get_drug_disorder_nedrex(url):
) )
return dataset return dataset
def get_drug_target_chembl(): def get_drug_target_chembl():
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='ChEMBL', name='ChEMBL',
...@@ -65,6 +102,7 @@ def get_drug_target_chembl(): ...@@ -65,6 +102,7 @@ def get_drug_target_chembl():
) )
return dataset return dataset
def get_drug_target_dgidb(): def get_drug_target_dgidb():
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='DGIdb', name='DGIdb',
...@@ -73,6 +111,7 @@ def get_drug_target_dgidb(): ...@@ -73,6 +111,7 @@ def get_drug_target_dgidb():
) )
return dataset return dataset
def get_drug_target_drugbank(): def get_drug_target_drugbank():
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank', name='DrugBank',
...@@ -81,6 +120,7 @@ def get_drug_target_drugbank(): ...@@ -81,6 +120,7 @@ def get_drug_target_drugbank():
) )
return dataset return dataset
def get_disorder_protein_disgenet(): def get_disorder_protein_disgenet():
dataset, _ = models.PDisDataset.objects.get_or_create( dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET', name='DisGeNET',
...@@ -97,3 +137,66 @@ def get_drug_disorder_drugbank(): ...@@ -97,3 +137,66 @@ def get_drug_disorder_drugbank():
version='5.1.8', version='5.1.8',
) )
return dataset return dataset
def get_dis_prot_nedrex_disgenet(url):
dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET',
link=url,
version=get_metadata()['source_databases']['disgenet']['date']
)
return dataset
def get_dis_prot_nedrex_omim(url):
dataset, _ = models.PDisDataset.objects.get_or_create(
name='OMIM',
link=url,
version=get_metadata()['source_databases']['omim']['date']
)
return dataset
def get_drdis_nedrex_drugcentral(url):
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
)
return dataset
def get_drdis_nedrex_ctd(url):
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='CTD',
link=url,
version=get_metadata()['source_databases']['ctd']['date']
)
return dataset
def get_pdr_nedrex_drugcentral(url):
dataset, _ = models.PDIDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
)
return dataset
def get_pdr_nedrex_drugbank(url):
dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank',
link=url,
version=get_metadata()['source_databases']['drugbank']['date']
)
return dataset
def get_pdr_nedrex_datasets(url):
return {'drugbank': get_pdr_nedrex_drugbank(url), 'drug_central': get_pdr_nedrex_drugcentral(url)}
def get_drdis_nedrex_datasets(url):
return {'ctd':get_drdis_nedrex_ctd(url), 'drug_central':get_drdis_nedrex_drugcentral(url)}
def get_ppi_nedrex_datasets(url):
return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)}
def get_dis_prot_nedrex_datasets(url):
return {'disgenet': get_dis_prot_nedrex_disgenet(url), 'omim': get_dis_prot_nedrex_omim(url)}
\ No newline at end of file
import subprocess
from celery import shared_task from celery import shared_task
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from drugstone.management.commands.populate_db import populate from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
...@@ -15,7 +16,10 @@ def task_update_db_from_nedrex(): ...@@ -15,7 +16,10 @@ def task_update_db_from_nedrex():
logger.info('Updating data...') logger.info('Updating data...')
n = populate({"all": True, "update": True, "data_dir": data_dir}) n = populate({"all": True, "update": True, "data_dir": data_dir})
logger.info(f'Added {n} entries!') logger.info(f'Added {n} entries!')
if n > 0: if 1 > 0:
logger.info('Recreating networks...') logger.info('Recreating networks...')
make_graphs() proc = subprocess.Popen(['python3', '/usr/src/drugstone/manage.py', 'make_graphs'])
out,err = proc.communicate()
print(out)
print(err)
logger.info('Done.') logger.info('Done.')
...@@ -5,15 +5,15 @@ file="store/docker-entrypoint.lock" ...@@ -5,15 +5,15 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then #if ! test -f "$file"; then
# sh scripts/import-data.sh # sh scripts/import-data.sh
python3 manage.py makemigrations drugstone python3 manage.py makemigrations drugstone
python3 manage.py migrate python3 manage.py migrate
python3 manage.py createfixtures python3 manage.py createfixtures
python3 manage.py cleanuptasks python3 manage.py cleanuptasks
python3 manage.py populate_db -u --all # python3 manage.py populate_db -u --all
python3 manage.py make_graphs # python3 manage.py make_graphs
touch $file # touch $file
fi #fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
sleep 10
celery -A drugstone worker -l INFO celery -A drugstone worker -l INFO
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment