Skip to content
Snippets Groups Projects
Commit 3e5802e1 authored by AndiMajore's avatar AndiMajore
Browse files

fixed updating

parent 1602a919
No related branches found
No related tags found
No related merge requests found
Pipeline #12006 failed
...@@ -60,6 +60,8 @@ services: ...@@ -60,6 +60,8 @@ services:
image: drugstone_backend image: drugstone_backend
container_name: drugstone_celery container_name: drugstone_celery
hostname: drugstone_celery hostname: drugstone_celery
volumes:
- drugstone_data_volume:/usr/src/drugstone/data
env_file: env_file:
- './docker-django.env.dev' - './docker-django.env.dev'
depends_on: depends_on:
......
...@@ -112,10 +112,12 @@ class NedrexImporter: ...@@ -112,10 +112,12 @@ class NedrexImporter:
(updates, creates) = identify_updates(proteins, self.cache.proteins) (updates, creates) = identify_updates(proteins, self.cache.proteins)
for u in updates: for u in updates:
u.save() u.save()
self.cache.proteins[u.uniprot_code] = u
models.Protein.objects.bulk_create(creates) models.Protein.objects.bulk_create(creates)
for protein in creates: for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code) self.cache.protein_updates.add(protein.uniprot_code)
self.cache.init_protein_maps()
return len(creates) return len(creates)
else: else:
models.Protein.objects.bulk_create(proteins.values()) models.Protein.objects.bulk_create(proteins.values())
...@@ -180,13 +182,18 @@ class NedrexImporter: ...@@ -180,13 +182,18 @@ class NedrexImporter:
self.cache.init_proteins() self.cache.init_proteins()
bulk = set() bulk = set()
existing = set()
if update:
for edge in models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset):
existing.add(edge.__hash__())
def add_dpi(edge): def add_dpi(edge):
try: try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId'])) drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId'])) protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)): e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)) if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError: except KeyError:
pass pass
...@@ -198,6 +205,10 @@ class NedrexImporter: ...@@ -198,6 +205,10 @@ class NedrexImporter:
self.cache.init_proteins() self.cache.init_proteins()
bulk = list() bulk = list()
existing = set()
if update:
for edge in models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset):
existing.add(edge.__hash__())
def iter_ppi(eval): def iter_ppi(eval):
from python_nedrex import ppi from python_nedrex import ppi
...@@ -215,9 +226,9 @@ class NedrexImporter: ...@@ -215,9 +226,9 @@ class NedrexImporter:
try: try:
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne'])) protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo'])) protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)): e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,to_protein=protein2)
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, if not update or e.__hash__() not in existing:
to_protein=protein2)) bulk.append(e)
except KeyError: except KeyError:
pass pass
...@@ -230,14 +241,19 @@ class NedrexImporter: ...@@ -230,14 +241,19 @@ class NedrexImporter:
self.cache.init_proteins() self.cache.init_proteins()
bulk = set() bulk = set()
existing = set()
if update:
for edge in models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset):
existing.add(edge.__hash__())
def add_pdis(edge): def add_pdis(edge):
try: try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId'])) disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])): for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)): e = models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein, disorder=disorder,
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein, score=edge['score'])
disorder=disorder, score=edge['score'])) if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError: except KeyError:
pass pass
...@@ -250,13 +266,18 @@ class NedrexImporter: ...@@ -250,13 +266,18 @@ class NedrexImporter:
self.cache.init_drugs() self.cache.init_drugs()
bulk = set() bulk = set()
existing = set()
if update:
for edge in models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset):
existing.add(edge.__hash__())
def add_drdis(edge): def add_drdis(edge):
try: try:
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId'])) drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId'])) disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)): e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)) if not update or e.__hash__() not in existing:
bulk.add(e)
except KeyError: except KeyError:
pass pass
......
...@@ -12,8 +12,6 @@ from drugstone.management.includes.NodeCache import NodeCache ...@@ -12,8 +12,6 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader from drugstone.management.includes import DatasetLoader
class DatabasePopulator: class DatabasePopulator:
def __init__(self, data_dir): def __init__(self, data_dir):
self.data_dir = data_dir self.data_dir = data_dir
...@@ -93,10 +91,10 @@ def populate(kwargs): ...@@ -93,10 +91,10 @@ def populate(kwargs):
db_populator = DatabasePopulator(data_dir=data_dir) db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']: if 'clear' in kwargs and kwargs['clear']:
db_populator.delete_all() db_populator.delete_all()
if kwargs['delete_model'] is not None: if 'delete_model' in kwargs and kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',') model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list) db_populator.delete_models(model_list)
...@@ -105,7 +103,8 @@ def populate(kwargs): ...@@ -105,7 +103,8 @@ def populate(kwargs):
importer = NedrexImporter(nedrex_api_url, cache) importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache) populator = DataPopulator(cache)
if kwargs['all']: total_n = 0
if 'all' in kwargs and kwargs['all']:
kwargs['drugs'] = True kwargs['drugs'] = True
kwargs['disorders'] = True kwargs['disorders'] = True
kwargs['proteins'] = True kwargs['proteins'] = True
...@@ -118,24 +117,29 @@ def populate(kwargs): ...@@ -118,24 +117,29 @@ def populate(kwargs):
if kwargs['drugs']: if kwargs['drugs']:
print('Populating Drugs...') print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update) n = NedrexImporter.import_drugs(importer, update)
total_n +=n
print(f'Populated {n} Drugs.') print(f'Populated {n} Drugs.')
if kwargs['disorders']: if kwargs['disorders']:
print('Populating Disorders...') print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update) n = NedrexImporter.import_disorders(importer, update)
total_n += n
print(f'Populated {n} Disorders.') print(f'Populated {n} Disorders.')
if kwargs['proteins']: if kwargs['proteins']:
print('Populating Proteins...') print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update) n = NedrexImporter.import_proteins(importer, update)
total_n += n
print(f'Populated {n} Proteins.') print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...') print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator, update) n = DataPopulator.populate_ensg(populator, update)
total_n += n
print(f'Populated {n} ENSG IDs.') print(f'Populated {n} ENSG IDs.')
if kwargs['exp']: if kwargs['exp']:
print('Populating Expressions...') print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update) n = DataPopulator.populate_expressions(populator, update)
total_n += n
print(f'Populated {n} Expressions.') print(f'Populated {n} Expressions.')
if kwargs['protein_protein']: if kwargs['protein_protein']:
...@@ -143,17 +147,21 @@ def populate(kwargs): ...@@ -143,17 +147,21 @@ def populate(kwargs):
n = NedrexImporter.import_protein_protein_interactions(importer, n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url), DatasetLoader.get_ppi_nedrex(nedrex_api_url),
update) update)
total_n += n
print(f'Imported {n} PPIs from NeDRexDB') print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...') print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update) n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
total_n += n
print(f'Populated {n} PPIs from STRING.') print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...') print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update) n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
total_n += n
print(f'Populated {n} PPIs from APID.') print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...') print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update) n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
total_n += n
print(f'Populated {n} PPIs from BioGRID.') print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']: if kwargs['protein_drug']:
...@@ -161,18 +169,22 @@ def populate(kwargs): ...@@ -161,18 +169,22 @@ def populate(kwargs):
n = NedrexImporter.import_drug_target_interactions(importer, n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url), DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update) update)
total_n += n
print(f'Imported {n} PDIs from NeDRexDB') print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...') print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update) n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
total_n += n
print(f'Populated {n} PDIs from Chembl.') print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...') print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update) n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
total_n += n
print(f'Populated {n} PDIs from DGIdb.') print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...') print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update) n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
total_n += n
print(f'Populated {n} PDIs from DrugBank.') print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']: if kwargs['protein_disorder']:
...@@ -181,9 +193,11 @@ def populate(kwargs): ...@@ -181,9 +193,11 @@ def populate(kwargs):
DatasetLoader.get_protein_disorder_nedrex( DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url), nedrex_api_url),
update) update)
total_n += n
print(f'Imported {n} PDis from NeDRexDB') print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...') print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update) n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
total_n += n
print(f'Populated {n} PDis associations from DisGeNET.') print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']: if kwargs['drug_disorder']:
...@@ -191,7 +205,12 @@ def populate(kwargs): ...@@ -191,7 +205,12 @@ def populate(kwargs):
n = NedrexImporter.import_drug_disorder_indications(importer, n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url), DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update) update)
total_n += n
print(f'Imported {n} DrDis from NeDRexDB') print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...') print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update) n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
total_n += n
print(f'Populated {n} DrDi associations from DrugBank.') print(f'Populated {n} DrDi associations from DrugBank.')
cache.clear()
return total_n
...@@ -15,7 +15,8 @@ class DataPopulator: ...@@ -15,7 +15,8 @@ class DataPopulator:
tissues_models = dict() tissues_models = dict()
for tissue_name in df.columns.values[2:]: for tissue_name in df.columns.values[2:]:
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name) tissue,_ = models.Tissue.objects.get_or_create(name=tissue_name)
tissues_models[tissue_name] = tissue
proteins_linked = 0 proteins_linked = 0
bulk = set() bulk = set()
......
...@@ -3,7 +3,6 @@ import drugstone.models as models ...@@ -3,7 +3,6 @@ import drugstone.models as models
class NodeCache: class NodeCache:
proteins = dict() proteins = dict()
entrez_to_uniprot = defaultdict(lambda: set()) entrez_to_uniprot = defaultdict(lambda: set())
gene_name_to_uniprot = defaultdict(lambda: set()) gene_name_to_uniprot = defaultdict(lambda: set())
...@@ -14,8 +13,21 @@ class NodeCache: ...@@ -14,8 +13,21 @@ class NodeCache:
disorder_updates = set() disorder_updates = set()
protein_updates = set() protein_updates = set()
def clear(self):
self.proteins = dict()
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
self.disorders = dict()
self.drugs = dict()
self.drug_updates = set()
self.disorder_updates = set()
self.protein_updates = set()
def init_protein_maps(self): def init_protein_maps(self):
print("Generating protein id maps...") print("Generating protein id maps...")
self.entrez_to_uniprot = defaultdict(lambda: set())
self.gene_name_to_uniprot = defaultdict(lambda: set())
for protein in self.proteins.values(): for protein in self.proteins.values():
self.entrez_to_uniprot[protein.entrez].add(protein.uniprot_code) self.entrez_to_uniprot[protein.entrez].add(protein.uniprot_code)
self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code) self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
...@@ -24,9 +36,6 @@ class NodeCache: ...@@ -24,9 +36,6 @@ class NodeCache:
if len(self.proteins) == 0: if len(self.proteins) == 0:
print("Generating protein maps...") print("Generating protein maps...")
for protein in models.Protein.objects.all(): for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0): if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps() self.init_protein_maps()
...@@ -35,18 +44,12 @@ class NodeCache: ...@@ -35,18 +44,12 @@ class NodeCache:
if len(self.drugs) == 0: if len(self.drugs) == 0:
print("Generating drug map...") print("Generating drug map...")
for drug in models.Drug.objects.all(): for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug self.drugs[drug.drug_id] = drug
def init_disorders(self): def init_disorders(self):
if len(self.disorders) == 0: if len(self.disorders) == 0:
print("Generating disorder map...") print("Generating disorder map...")
for disorder in models.Disorder.objects.all(): for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein: models.Protein): def is_new_protein(self, protein: models.Protein):
......
...@@ -6,13 +6,16 @@ from drugstone.management.commands.make_graphs import run as make_graphs ...@@ -6,13 +6,16 @@ from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/" nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = "/usr/src/drugstone/data"
@shared_task @shared_task
def task_update_db_from_nedrex(): def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.') logger.info('Updating DB from NeDRex.')
logger.info('Updating data...') logger.info('Updating data...')
populate({"all": True, "update": True}) n = populate({"all": True, "update": True, "data_dir": data_dir})
logger.info(f'Added {n} entries!')
if n > 0:
logger.info('Recreating networks...') logger.info('Recreating networks...')
make_graphs() make_graphs()
logger.info('Done.') logger.info('Done.')
...@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock" ...@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
#if ! test -f "$file"; then if ! test -f "$file"; then
# sh scripts/import-data.sh # sh scripts/import-data.sh
python3 manage.py makemigrations drugstone python3 manage.py makemigrations drugstone
python3 manage.py migrate python3 manage.py migrate
...@@ -14,6 +14,6 @@ file="store/docker-entrypoint.lock" ...@@ -14,6 +14,6 @@ file="store/docker-entrypoint.lock"
python3 manage.py populate_db -u --all python3 manage.py populate_db -u --all
python3 manage.py make_graphs python3 manage.py make_graphs
touch $file touch $file
#fi fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment