Skip to content
Snippets Groups Projects
Commit 9c065a99 authored by AndiMajore's avatar AndiMajore
Browse files

added update and autoupdate functions

parent 002d409b
No related branches found
No related tags found
No related merge requests found
......@@ -107,7 +107,6 @@ class NedrexImporter:
proteins = with_entrez
iter_node_collection('gene', add_genes)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(proteins, self.cache.proteins)
......@@ -116,6 +115,8 @@ class NedrexImporter:
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
return len(creates)
else:
models.Protein.objects.bulk_create(proteins.values())
self.cache.proteins = proteins
......@@ -132,14 +133,16 @@ class NedrexImporter:
iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(drugs, self.cache.drugs)
for u in updates:
u.save()
models.Drug.objects.bulk_create(creates)
for drug in creates:
self.cache.drug_updates.add(drug.drug_id)
self.cache.drugs[drug.drug_id] = drug
return len(creates)
else:
models.Drug.objects.bulk_create(drugs.values())
self.cache.drugs = drugs
......@@ -157,14 +160,15 @@ class NedrexImporter:
iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(disorders, self.cache.disorders)
for u in updates:
u.save()
models.Disorder.objects.bulk_create(creates)
for disorder in creates:
self.cache.disorder_updates.add(disorder.mondo_id)
self.cache.disorders[disorder.mondo_id] = disorder
return len(creates)
else:
models.Disorder.objects.bulk_create(disorders.values())
self.cache.disorders = disorders
......@@ -175,18 +179,14 @@ class NedrexImporter:
self.cache.init_drugs()
self.cache.init_proteins()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
bulk = set()
def add_dpi(edge):
try:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
protein=self.cache.get_protein_by_uniprot(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
except KeyError:
pass
......@@ -197,9 +197,6 @@ class NedrexImporter:
def import_protein_protein_interactions(self, dataset, update):
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
bulk = list()
def iter_ppi(eval):
......@@ -216,11 +213,11 @@ class NedrexImporter:
def add_ppi(edge):
try:
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset,
from_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberOne'])),
to_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberTwo']))))
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_protein=protein2))
except KeyError:
pass
......@@ -232,17 +229,14 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_proteins()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
bulk = set()
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset,
protein=protein,
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score']))
except KeyError:
pass
......@@ -255,18 +249,14 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_drugs()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
bulk = set()
def add_drdis(edge):
try:
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
disorder=self.cache.get_disorder_by_mondo(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
except KeyError:
pass
......
......@@ -187,6 +187,9 @@ class Command(BaseCommand):
pass
def handle(self, *args, **kwargs):
run()
def run():
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
......
......@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
class DatabasePopulator:
def __init__(self, data_dir):
self.data_dir = data_dir
......@@ -82,6 +84,10 @@ class Command(BaseCommand):
parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
def handle(self, *args, **kwargs):
populate(kwargs)
def populate(kwargs):
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir']
......@@ -152,7 +158,9 @@ class Command(BaseCommand):
if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update)
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
......@@ -170,7 +178,8 @@ class Command(BaseCommand):
if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url),
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update)
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
......
......@@ -9,19 +9,13 @@ class DataPopulator:
self.cache = cache
def populate_expressions(self, update):
if update:
models.ExpressionLevel.objects.all().delete()
self.cache.init_proteins()
df = DataLoader.load_expressions()
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
try:
tissue_model = models.Tissue.objects.get(name=tissue_name)
except models.Tissue.DoesNotExist:
tissue_model = models.Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
proteins_linked = 0
bulk = set()
......@@ -33,7 +27,7 @@ class DataPopulator:
for protein_model in self.cache.get_proteins_by_gene(gene_name):
proteins_linked += 1
if not update or self.cache.is_new_protein(protein_model):
for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model,
......@@ -59,8 +53,6 @@ class DataPopulator:
Returns:
int: Count of how many ensg-protein relations were added
"""
if update:
models.EnsemblGene.objects.all().delete()
self.cache.init_proteins()
data = DataLoader.load_ensg()
bulk = list()
......@@ -69,6 +61,7 @@ class DataPopulator:
proteins = self.cache.get_proteins_by_entrez(entrez)
for protein in proteins:
for ensg in ensg_list:
if not update or self.cache.is_new_protein(protein):
bulk.append(models.EnsemblGene(name=ensg, protein=protein))
models.EnsemblGene.objects.bulk_create(bulk)
return len(bulk)
......@@ -81,8 +74,6 @@ class DataPopulator:
int: Count of how many interactions were added
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_string()
bulk = list()
......@@ -92,19 +83,15 @@ class DataPopulator:
proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
except KeyError:
# continue if not found
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -117,8 +104,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_apid()
bulk = set()
for _, row in df.iterrows():
......@@ -129,14 +114,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.add(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -149,8 +132,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_biogrid()
bulk = list()
for _, row in df.iterrows():
......@@ -164,15 +145,12 @@ class DataPopulator:
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -186,8 +164,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_chembl()
bulk = set()
for _, row in df.iterrows():
......@@ -202,6 +178,7 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
......@@ -220,9 +197,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_disorders()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
df = DataLoader.load_pdis_disgenet()
bulk = set()
for _, row in df.iterrows():
......@@ -238,6 +212,7 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset,
protein=protein,
......@@ -256,8 +231,6 @@ class DataPopulator:
"""
self.cache.init_drugs()
self.cache.init_disorders()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
df = DataLoader.load_drdis_drugbank()
bulk = set()
......@@ -274,6 +247,7 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset,
drug=drug,
......@@ -292,24 +266,19 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_dgidb()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
......@@ -328,25 +297,19 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_drugbank()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
......
......@@ -10,6 +10,10 @@ class NodeCache:
disorders = dict()
drugs = dict()
drug_updates = set()
disorder_updates = set()
protein_updates = set()
def init_protein_maps(self):
print("Generating protein id maps...")
for protein in self.proteins.values():
......@@ -20,23 +24,39 @@ class NodeCache:
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()
def init_drugs(self):
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id):
return self.proteins[uniprot_id]
......
......@@ -84,6 +84,9 @@ class Protein(models.Model):
def __ne__(self, other):
return not self.__eq__(other)
def __hash__(self):
return hash((self.uniprot_code, self.gene, self.entrez))
def update(self, other):
self.uniprot_code = other.uniprot_code
self.gene = other.gene
......
......@@ -3,6 +3,6 @@ from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = {
'update_db': {
'task': 'drugstone.tasks.task_update_db_from_nedrex',
'schedule': crontab(minute='*/1'),
'schedule': crontab(day_of_week=1, hour=5, minute=0),
},
}
from celery import shared_task
from celery.utils.log import get_task_logger
from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data
from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
@shared_task
def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.')
print('here')
logger.info('Fetching data...')
# fetch_nedrex_data()
logger.info('Integrating data...')
# integrate_nedrex_data()
logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Recreating networks...')
make_graphs()
logger.info('Done.')
......@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then
#if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
......@@ -14,6 +14,6 @@ if ! test -f "$file"; then
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
fi
#fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment