Skip to content
Snippets Groups Projects
Commit e5962ad1 authored by AndiMajore's avatar AndiMajore
Browse files

fixed make_graphs; added individual source datasets from nedrex

Former-commit-id: 640d8907dc42bfb580198df3b9604522b7217ad7 [formerly 3bc93bec4b541af7214304f3ac9aea93432a419a]
Former-commit-id: 3c2f33d7f54d45181aecdb86da82770b0e8d9674
parent 005a116f
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ from python_nedrex.core import get_nodes, get_edges, get_api_key
from drugstone import models
from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
def iter_node_collection(coll_name, eval):
......@@ -57,10 +58,12 @@ def to_id(string):
class NedrexImporter:
cache: NodeCache = None
url: str = ''
def __init__(self, base_url, cache: NodeCache):
self.cache = cache
nedrex.config.set_url_base(base_url)
self.url = base_url
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
......@@ -187,6 +190,8 @@ class NedrexImporter:
for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset):
existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_pdr_nedrex_datasets(self.url)
def add_dpi(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
......@@ -194,6 +199,10 @@ class NedrexImporter:
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['assertedBy']:
bulk.add(
models.ProteinProteinInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein))
except KeyError:
pass
......@@ -210,6 +219,8 @@ class NedrexImporter:
for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset):
existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_ppi_nedrex_datasets(self.url)
def iter_ppi(eval):
from python_nedrex import ppi
offset = 0
......@@ -226,9 +237,13 @@ class NedrexImporter:
try:
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2)
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2)
if not update or e.__hash__() not in existing:
bulk.append(e)
for source in edge['assertedBy']:
bulk.append(
models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1,
to_protein=protein2))
except KeyError:
pass
......@@ -246,6 +261,8 @@ class NedrexImporter:
for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset):
existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_dis_prot_nedrex_datasets(self.url)
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
......@@ -254,6 +271,10 @@ class NedrexImporter:
score=edge['score'])
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['assertedBy']:
bulk.add(
models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder,
score=edge['score']))
except KeyError:
pass
......@@ -271,6 +292,8 @@ class NedrexImporter:
for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset):
existing.add(edge.__hash__())
source_datasets = DatasetLoader.get_drdis_nedrex_datasets(self.url)
def add_drdis(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
......@@ -278,6 +301,9 @@ class NedrexImporter:
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['assertedBy']:
bulk.add(
models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder))
except KeyError:
pass
......
......@@ -187,22 +187,19 @@ class Command(BaseCommand):
pass
def handle(self, *args, **kwargs):
run()
def run():
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
pdi_datasets = models.PDIDataset.objects.all()
pdi_datasets_names = [e.name for e in pdi_datasets]
parameter_combinations = []
for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names:
parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
# close all database connections so subprocesses will create their own connections
# this prevents the processes from running into problems because of using the same connection
db.connections.close_all()
pool = multiprocessing.Pool(KERNEL)
pool.map(create_gt, parameter_combinations)
\ No newline at end of file
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
pdi_datasets = models.PDIDataset.objects.all()
pdi_datasets_names = [e.name for e in pdi_datasets]
parameter_combinations = []
for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names:
parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
# close all database connections so subprocesses will create their own connections
# this prevents the processes from running into problems because of using the same connection
db.connections.close_all()
pool = multiprocessing.Pool(KERNEL)
pool.map(create_gt, parameter_combinations)
......@@ -159,10 +159,10 @@ def populate(kwargs):
total_n += n
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
total_n += n
print(f'Populated {n} PPIs from BioGRID.')
# print('Populating PPIs from BioGRID...')
# n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
# total_n += n
# print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...')
......@@ -182,10 +182,10 @@ def populate(kwargs):
total_n += n
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
total_n += n
print(f'Populated {n} PDIs from DrugBank.')
# print('Populating PDIs from DrugBank...')
# n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
# total_n += n
# print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...')
......@@ -195,10 +195,10 @@ def populate(kwargs):
update)
total_n += n
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
total_n += n
print(f'Populated {n} PDis associations from DisGeNET.')
# print('Populating PDis associations from DisGeNET...')
# n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
# total_n += n
# print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
print('Importing DrDis from NeDRexDB...')
......
from drugstone import models
from python_nedrex.static import get_metadata
ppi_nedrex_datasets = dict()
def get_ppi_string():
dataset, _ = models.PPIDataset.objects.get_or_create(
name='STRING',
......@@ -9,6 +12,7 @@ def get_ppi_string():
)
return dataset
def get_ppi_apid():
dataset, _ = models.PPIDataset.objects.get_or_create(
name='APID',
......@@ -17,6 +21,34 @@ def get_ppi_apid():
)
return dataset
def get_ppi_nedrex_biogrid(url):
dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID',
link=url,
version=get_metadata()['source_databases']['biogrid']['date']
)
return dataset
def get_ppi_nedrex_iid(url):
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IID',
link=url,
version=get_metadata()['source_databases']['iid']['date']
)
return dataset
def get_ppi_nedrex_intact(url):
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IntAct',
link=url,
version=get_metadata()['source_databases']['intact']['date']
)
return dataset
def get_ppi_biogrid():
dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID',
......@@ -25,6 +57,7 @@ def get_ppi_biogrid():
)
return dataset
def get_drug_target_nedrex(url):
dataset, _ = models.PDIDataset.objects.get_or_create(
name='NeDRex',
......@@ -33,6 +66,7 @@ def get_drug_target_nedrex(url):
)
return dataset
def get_ppi_nedrex(url):
dataset, _ = models.PPIDataset.objects.get_or_create(
name='NeDRex',
......@@ -41,6 +75,7 @@ def get_ppi_nedrex(url):
)
return dataset
def get_protein_disorder_nedrex(url):
dataset, _ = models.PDisDataset.objects.get_or_create(
name='NeDRex',
......@@ -49,6 +84,7 @@ def get_protein_disorder_nedrex(url):
)
return dataset
def get_drug_disorder_nedrex(url):
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='NeDRex',
......@@ -57,6 +93,7 @@ def get_drug_disorder_nedrex(url):
)
return dataset
def get_drug_target_chembl():
dataset, _ = models.PDIDataset.objects.get_or_create(
name='ChEMBL',
......@@ -65,6 +102,7 @@ def get_drug_target_chembl():
)
return dataset
def get_drug_target_dgidb():
dataset, _ = models.PDIDataset.objects.get_or_create(
name='DGIdb',
......@@ -73,6 +111,7 @@ def get_drug_target_dgidb():
)
return dataset
def get_drug_target_drugbank():
dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank',
......@@ -81,6 +120,7 @@ def get_drug_target_drugbank():
)
return dataset
def get_disorder_protein_disgenet():
dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET',
......@@ -97,3 +137,66 @@ def get_drug_disorder_drugbank():
version='5.1.8',
)
return dataset
def get_dis_prot_nedrex_disgenet(url):
dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET',
link=url,
version=get_metadata()['source_databases']['disgenet']['date']
)
return dataset
def get_dis_prot_nedrex_omim(url):
dataset, _ = models.PDisDataset.objects.get_or_create(
name='OMIM',
link=url,
version=get_metadata()['source_databases']['omim']['date']
)
return dataset
def get_drdis_nedrex_drugcentral(url):
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
)
return dataset
def get_drdis_nedrex_ctd(url):
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='CTD',
link=url,
version=get_metadata()['source_databases']['ctd']['date']
)
return dataset
def get_pdr_nedrex_drugcentral(url):
dataset, _ = models.PDIDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
)
return dataset
def get_pdr_nedrex_drugbank(url):
dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank',
link=url,
version=get_metadata()['source_databases']['drugbank']['date']
)
return dataset
def get_pdr_nedrex_datasets(url):
return {'drugbank': get_pdr_nedrex_drugbank(url), 'drug_central': get_pdr_nedrex_drugcentral(url)}
def get_drdis_nedrex_datasets(url):
return {'ctd':get_drdis_nedrex_ctd(url), 'drug_central':get_drdis_nedrex_drugcentral(url)}
def get_ppi_nedrex_datasets(url):
return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)}
def get_dis_prot_nedrex_datasets(url):
return {'disgenet': get_dis_prot_nedrex_disgenet(url), 'omim': get_dis_prot_nedrex_omim(url)}
\ No newline at end of file
import subprocess
from celery import shared_task
from celery.utils.log import get_task_logger
from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
......@@ -15,7 +16,10 @@ def task_update_db_from_nedrex():
logger.info('Updating data...')
n = populate({"all": True, "update": True, "data_dir": data_dir})
logger.info(f'Added {n} entries!')
if n > 0:
if 1 > 0:
logger.info('Recreating networks...')
make_graphs()
proc = subprocess.Popen(['python3', '/usr/src/drugstone/manage.py', 'make_graphs'])
out,err = proc.communicate()
print(out)
print(err)
logger.info('Done.')
......@@ -5,15 +5,15 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then
#if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
python3 manage.py createfixtures
python3 manage.py cleanuptasks
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
fi
# python3 manage.py populate_db -u --all
# python3 manage.py make_graphs
# touch $file
#fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
sleep 10
celery -A drugstone worker -l INFO
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment