Skip to content
Snippets Groups Projects
Commit 3cdabc51 authored by AndiMajore's avatar AndiMajore
Browse files

fixed updating

Former-commit-id: 28e944a1
parent 639c1d18
No related branches found
No related tags found
No related merge requests found
......@@ -60,6 +60,8 @@ services:
image: drugstone_backend
container_name: drugstone_celery
hostname: drugstone_celery
volumes:
- drugstone_data_volume:/usr/src/drugstone/data
env_file:
- './docker-django.env.dev'
depends_on:
......
......@@ -112,10 +112,12 @@ class NedrexImporter:
(updates, creates) = identify_updates(proteins, self.cache.proteins)
for u in updates:
u.save()
self.cache.proteins[u.uniprot_code] = u
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
self.cache.init_protein_maps()
return len(creates)
else:
models.Protein.objects.bulk_create(proteins.values())
......@@ -180,13 +182,18 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = set()
existing = set()
if update:
for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset):
existing.add(edge.__hash__())
def add_dpi(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......@@ -198,6 +205,10 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = list()
existing = set()
if update:
for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset):
existing.add(edge.__hash__())
def iter_ppi(eval):
from python_nedrex import ppi
......@@ -215,9 +226,9 @@ class NedrexImporter:
try:
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_protein=protein2))
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2)
if not update or e.__hash__() not in existing:
bulk.append(e)
except KeyError:
pass
......@@ -230,14 +241,19 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = set()
existing = set()
if update:
for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset):
existing.add(edge.__hash__())
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score']))
e = models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein, disorder=disorder,
score=edge['score'])
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......@@ -250,13 +266,18 @@ class NedrexImporter:
self.cache.init_drugs()
bulk = set()
existing = set()
if update:
for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset):
existing.add(edge.__hash__())
def add_drdis(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......
......@@ -12,8 +12,6 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
class DatabasePopulator:
def __init__(self, data_dir):
self.data_dir = data_dir
......@@ -93,10 +91,10 @@ def populate(kwargs):
db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']:
if 'clear' in kwargs and kwargs['clear']:
db_populator.delete_all()
if kwargs['delete_model'] is not None:
if 'delete_model' in kwargs and kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
......@@ -105,7 +103,8 @@ def populate(kwargs):
importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache)
if kwargs['all']:
total_n = 0
if 'all' in kwargs and kwargs['all']:
kwargs['drugs'] = True
kwargs['disorders'] = True
kwargs['proteins'] = True
......@@ -118,24 +117,29 @@ def populate(kwargs):
if kwargs['drugs']:
print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update)
total_n +=n
print(f'Populated {n} Drugs.')
if kwargs['disorders']:
print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update)
total_n += n
print(f'Populated {n} Disorders.')
if kwargs['proteins']:
print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update)
total_n += n
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator, update)
total_n += n
print(f'Populated {n} ENSG IDs.')
if kwargs['exp']:
print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update)
total_n += n
print(f'Populated {n} Expressions.')
if kwargs['protein_protein']:
......@@ -143,17 +147,21 @@ def populate(kwargs):
n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
total_n += n
print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
total_n += n
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
total_n += n
print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
......@@ -161,18 +169,22 @@ def populate(kwargs):
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
total_n += n
print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
total_n += n
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
total_n += n
print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
......@@ -181,9 +193,11 @@ def populate(kwargs):
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
total_n += n
print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
......@@ -191,7 +205,12 @@ def populate(kwargs):
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
total_n += n
print(f'Populated {n} DrDi associations from DrugBank.')
cache.clear()
return total_n
......@@ -15,7 +15,8 @@ class DataPopulator:
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
tissue,_ = models.Tissue.objects.get_or_create(name=tissue_name)
tissues_models[tissue_name] = tissue
proteins_linked = 0
bulk = set()
......
......@@ -3,7 +3,6 @@ import drugstone.models as models
class NodeCache:
proteins = dict()
entrez_to_uniprot = defaultdict(lambda: set())
gene_name_to_uniprot = defaultdict(lambda: set())
......@@ -14,8 +13,21 @@ class NodeCache:
disorder_updates = set()
protein_updates = set()
def clear(self):
self.proteins = dict()
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
self.disorders = dict()
self.drugs = dict()
self.drug_updates = set()
self.disorder_updates = set()
self.protein_updates = set()
def init_protein_maps(self):
print("Generating protein id maps...")
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
for protein in self.proteins.values():
self.entrez_to_uniprot[protein.entrez].add(protein.uniprot_code)
self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
......@@ -24,9 +36,6 @@ class NodeCache:
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()
......@@ -35,33 +44,27 @@ class NodeCache:
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
def is_new_protein(self, protein: models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
def is_new_drug(self, drug: models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
def is_new_disease(self, disease: models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id):
def get_protein_by_uniprot(self, uniprot_id):
return self.proteins[uniprot_id]
def get_proteins_by_entrez(self,entrez_id):
def get_proteins_by_entrez(self, entrez_id):
out = list()
for g in self.entrez_to_uniprot[entrez_id]:
out.append(self.proteins[g])
......@@ -77,4 +80,4 @@ class NodeCache:
return self.drugs[drugbank_id]
def get_disorder_by_mondo(self, mondo_id):
return self.disorders[mondo_id]
\ No newline at end of file
return self.disorders[mondo_id]
......@@ -6,13 +6,16 @@ from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = "/usr/src/drugstone/data"
@shared_task
def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.')
logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Recreating networks...')
make_graphs()
n = populate({"all": True, "update": True, "data_dir": data_dir})
logger.info(f'Added {n} entries!')
if n > 0:
logger.info('Recreating networks...')
make_graphs()
logger.info('Done.')
......@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
#if ! test -f "$file"; then
if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
......@@ -14,6 +14,6 @@ file="store/docker-entrypoint.lock"
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
#fi
fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment