Skip to content
Snippets Groups Projects
Commit 28e944a1 authored by AndiMajore's avatar AndiMajore
Browse files

fixed updating

parent 9c065a99
No related branches found
No related tags found
No related merge requests found
......@@ -60,6 +60,8 @@ services:
image: drugstone_backend
container_name: drugstone_celery
hostname: drugstone_celery
volumes:
- drugstone_data_volume:/usr/src/drugstone/data
env_file:
- './docker-django.env.dev'
depends_on:
......
......@@ -112,10 +112,12 @@ class NedrexImporter:
(updates, creates) = identify_updates(proteins, self.cache.proteins)
for u in updates:
u.save()
self.cache.proteins[u.uniprot_code] = u
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
self.cache.init_protein_maps()
return len(creates)
else:
models.Protein.objects.bulk_create(proteins.values())
......@@ -180,13 +182,18 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = set()
existing = set()
if update:
for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset):
existing.add(edge.__hash__())
def add_dpi(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......@@ -198,6 +205,10 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = list()
existing = set()
if update:
for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset):
existing.add(edge.__hash__())
def iter_ppi(eval):
from python_nedrex import ppi
......@@ -215,9 +226,9 @@ class NedrexImporter:
try:
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_protein=protein2))
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2)
if not update or e.__hash__() not in existing:
bulk.append(e)
except KeyError:
pass
......@@ -230,14 +241,19 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = set()
existing = set()
if update:
for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset):
existing.add(edge.__hash__())
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score']))
e = models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein, disorder=disorder,
score=edge['score'])
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......@@ -250,13 +266,18 @@ class NedrexImporter:
self.cache.init_drugs()
bulk = set()
existing = set()
if update:
for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset):
existing.add(edge.__hash__())
def add_drdis(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......
......@@ -12,8 +12,6 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
class DatabasePopulator:
def __init__(self, data_dir):
self.data_dir = data_dir
......@@ -93,10 +91,10 @@ def populate(kwargs):
db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']:
if 'clear' in kwargs and kwargs['clear']:
db_populator.delete_all()
if kwargs['delete_model'] is not None:
if 'delete_model' in kwargs and kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
......@@ -105,7 +103,8 @@ def populate(kwargs):
importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache)
if kwargs['all']:
total_n = 0
if 'all' in kwargs and kwargs['all']:
kwargs['drugs'] = True
kwargs['disorders'] = True
kwargs['proteins'] = True
......@@ -118,24 +117,29 @@ def populate(kwargs):
if kwargs['drugs']:
print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update)
total_n +=n
print(f'Populated {n} Drugs.')
if kwargs['disorders']:
print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update)
total_n += n
print(f'Populated {n} Disorders.')
if kwargs['proteins']:
print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update)
total_n += n
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator, update)
total_n += n
print(f'Populated {n} ENSG IDs.')
if kwargs['exp']:
print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update)
total_n += n
print(f'Populated {n} Expressions.')
if kwargs['protein_protein']:
......@@ -143,17 +147,21 @@ def populate(kwargs):
n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
total_n += n
print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
total_n += n
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
total_n += n
print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
......@@ -161,18 +169,22 @@ def populate(kwargs):
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
total_n += n
print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
total_n += n
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
total_n += n
print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
......@@ -181,9 +193,11 @@ def populate(kwargs):
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
total_n += n
print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
......@@ -191,7 +205,12 @@ def populate(kwargs):
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
total_n += n
print(f'Populated {n} DrDi associations from DrugBank.')
cache.clear()
return total_n
......@@ -15,7 +15,8 @@ class DataPopulator:
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
tissue,_ = models.Tissue.objects.get_or_create(name=tissue_name)
tissues_models[tissue_name] = tissue
proteins_linked = 0
bulk = set()
......
......@@ -3,7 +3,6 @@ import drugstone.models as models
class NodeCache:
proteins = dict()
entrez_to_uniprot = defaultdict(lambda: set())
gene_name_to_uniprot = defaultdict(lambda: set())
......@@ -14,8 +13,21 @@ class NodeCache:
disorder_updates = set()
protein_updates = set()
def clear(self):
self.proteins = dict()
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
self.disorders = dict()
self.drugs = dict()
self.drug_updates = set()
self.disorder_updates = set()
self.protein_updates = set()
def init_protein_maps(self):
print("Generating protein id maps...")
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
for protein in self.proteins.values():
self.entrez_to_uniprot[protein.entrez].add(protein.uniprot_code)
self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
......@@ -24,9 +36,6 @@ class NodeCache:
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()
......@@ -35,33 +44,27 @@ class NodeCache:
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
def is_new_protein(self, protein: models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
def is_new_drug(self, drug: models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
def is_new_disease(self, disease: models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id):
def get_protein_by_uniprot(self, uniprot_id):
return self.proteins[uniprot_id]
def get_proteins_by_entrez(self,entrez_id):
def get_proteins_by_entrez(self, entrez_id):
out = list()
for g in self.entrez_to_uniprot[entrez_id]:
out.append(self.proteins[g])
......@@ -77,4 +80,4 @@ class NodeCache:
return self.drugs[drugbank_id]
def get_disorder_by_mondo(self, mondo_id):
return self.disorders[mondo_id]
\ No newline at end of file
return self.disorders[mondo_id]
......@@ -6,13 +6,16 @@ from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = "/usr/src/drugstone/data"
@shared_task
def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.')
logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Recreating networks...')
make_graphs()
n = populate({"all": True, "update": True, "data_dir": data_dir})
logger.info(f'Added {n} entries!')
if n > 0:
logger.info('Recreating networks...')
make_graphs()
logger.info('Done.')
......@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
#if ! test -f "$file"; then
if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
......@@ -14,6 +14,6 @@ file="store/docker-entrypoint.lock"
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
#fi
fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment