Skip to content
Snippets Groups Projects
Commit 3e5802e1 authored by AndiMajore's avatar AndiMajore
Browse files

fixed updating

parent 1602a919
No related branches found
No related tags found
No related merge requests found
Pipeline #12006 failed
......@@ -60,6 +60,8 @@ services:
image: drugstone_backend
container_name: drugstone_celery
hostname: drugstone_celery
volumes:
- drugstone_data_volume:/usr/src/drugstone/data
env_file:
- './docker-django.env.dev'
depends_on:
......
......@@ -112,10 +112,12 @@ class NedrexImporter:
(updates, creates) = identify_updates(proteins, self.cache.proteins)
for u in updates:
u.save()
self.cache.proteins[u.uniprot_code] = u
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
self.cache.init_protein_maps()
return len(creates)
else:
models.Protein.objects.bulk_create(proteins.values())
......@@ -180,13 +182,18 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = set()
existing = set()
if update:
for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset):
existing.add(edge.__hash__())
def add_dpi(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......@@ -198,6 +205,10 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = list()
existing = set()
if update:
for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset):
existing.add(edge.__hash__())
def iter_ppi(eval):
from python_nedrex import ppi
......@@ -215,9 +226,9 @@ class NedrexImporter:
try:
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_protein=protein2))
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2)
if not update or e.__hash__() not in existing:
bulk.append(e)
except KeyError:
pass
......@@ -230,14 +241,19 @@ class NedrexImporter:
self.cache.init_proteins()
bulk = set()
existing = set()
if update:
for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset):
existing.add(edge.__hash__())
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score']))
e = models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein, disorder=disorder,
score=edge['score'])
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......@@ -250,13 +266,18 @@ class NedrexImporter:
self.cache.init_drugs()
bulk = set()
existing = set()
if update:
for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset):
existing.add(edge.__hash__())
def add_drdis(edge):
try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError:
pass
......
......@@ -12,8 +12,6 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
class DatabasePopulator:
def __init__(self, data_dir):
self.data_dir = data_dir
......@@ -93,10 +91,10 @@ def populate(kwargs):
db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']:
if 'clear' in kwargs and kwargs['clear']:
db_populator.delete_all()
if kwargs['delete_model'] is not None:
if 'delete_model' in kwargs and kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
......@@ -105,7 +103,8 @@ def populate(kwargs):
importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache)
if kwargs['all']:
total_n = 0
if 'all' in kwargs and kwargs['all']:
kwargs['drugs'] = True
kwargs['disorders'] = True
kwargs['proteins'] = True
......@@ -118,24 +117,29 @@ def populate(kwargs):
if kwargs['drugs']:
print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update)
total_n +=n
print(f'Populated {n} Drugs.')
if kwargs['disorders']:
print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update)
total_n += n
print(f'Populated {n} Disorders.')
if kwargs['proteins']:
print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update)
total_n += n
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator, update)
total_n += n
print(f'Populated {n} ENSG IDs.')
if kwargs['exp']:
print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update)
total_n += n
print(f'Populated {n} Expressions.')
if kwargs['protein_protein']:
......@@ -143,17 +147,21 @@ def populate(kwargs):
n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
total_n += n
print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
total_n += n
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
total_n += n
print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
......@@ -161,18 +169,22 @@ def populate(kwargs):
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
total_n += n
print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
total_n += n
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
total_n += n
print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
......@@ -181,9 +193,11 @@ def populate(kwargs):
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update)
total_n += n
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
total_n += n
print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
......@@ -191,7 +205,12 @@ def populate(kwargs):
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update)
total_n += n
print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
total_n += n
print(f'Populated {n} DrDi associations from DrugBank.')
cache.clear()
return total_n
......@@ -15,7 +15,8 @@ class DataPopulator:
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
tissue,_ = models.Tissue.objects.get_or_create(name=tissue_name)
tissues_models[tissue_name] = tissue
proteins_linked = 0
bulk = set()
......
......@@ -3,7 +3,6 @@ import drugstone.models as models
class NodeCache:
proteins = dict()
entrez_to_uniprot = defaultdict(lambda: set())
gene_name_to_uniprot = defaultdict(lambda: set())
......@@ -14,8 +13,21 @@ class NodeCache:
disorder_updates = set()
protein_updates = set()
def clear(self):
self.proteins = dict()
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
self.disorders = dict()
self.drugs = dict()
self.drug_updates = set()
self.disorder_updates = set()
self.protein_updates = set()
def init_protein_maps(self):
print("Generating protein id maps...")
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
for protein in self.proteins.values():
self.entrez_to_uniprot[protein.entrez].add(protein.uniprot_code)
self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
......@@ -24,9 +36,6 @@ class NodeCache:
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()
......@@ -35,18 +44,12 @@ class NodeCache:
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein: models.Protein):
......
......@@ -6,13 +6,16 @@ from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = "/usr/src/drugstone/data"
@shared_task
def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.')
logger.info('Updating data...')
populate({"all": True, "update": True})
n = populate({"all": True, "update": True, "data_dir": data_dir})
logger.info(f'Added {n} entries!')
if n > 0:
logger.info('Recreating networks...')
make_graphs()
logger.info('Done.')
......@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
#if ! test -f "$file"; then
if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
......@@ -14,6 +14,6 @@ file="store/docker-entrypoint.lock"
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
#fi
fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment