Skip to content
Snippets Groups Projects
Commit 639c1d18 authored by AndiMajore's avatar AndiMajore
Browse files

added update and autoupdate functions

Former-commit-id: 9c065a99
parent f8e7753c
No related branches found
No related tags found
No related merge requests found
......@@ -107,7 +107,6 @@ class NedrexImporter:
proteins = with_entrez
iter_node_collection('gene', add_genes)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(proteins, self.cache.proteins)
......@@ -116,6 +115,8 @@ class NedrexImporter:
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
return len(creates)
else:
models.Protein.objects.bulk_create(proteins.values())
self.cache.proteins = proteins
......@@ -132,14 +133,16 @@ class NedrexImporter:
iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(drugs, self.cache.drugs)
for u in updates:
u.save()
models.Drug.objects.bulk_create(creates)
for drug in creates:
self.cache.drug_updates.add(drug.drug_id)
self.cache.drugs[drug.drug_id] = drug
return len(creates)
else:
models.Drug.objects.bulk_create(drugs.values())
self.cache.drugs = drugs
......@@ -157,14 +160,15 @@ class NedrexImporter:
iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(disorders, self.cache.disorders)
for u in updates:
u.save()
models.Disorder.objects.bulk_create(creates)
for disorder in creates:
self.cache.disorder_updates.add(disorder.mondo_id)
self.cache.disorders[disorder.mondo_id] = disorder
return len(creates)
else:
models.Disorder.objects.bulk_create(disorders.values())
self.cache.disorders = disorders
......@@ -175,18 +179,14 @@ class NedrexImporter:
self.cache.init_drugs()
self.cache.init_proteins()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
bulk = set()
def add_dpi(edge):
try:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
protein=self.cache.get_protein_by_uniprot(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
except KeyError:
pass
......@@ -197,9 +197,6 @@ class NedrexImporter:
def import_protein_protein_interactions(self, dataset, update):
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
bulk = list()
def iter_ppi(eval):
......@@ -216,11 +213,11 @@ class NedrexImporter:
def add_ppi(edge):
try:
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset,
from_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberOne'])),
to_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberTwo']))))
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_protein=protein2))
except KeyError:
pass
......@@ -232,17 +229,14 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_proteins()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
bulk = set()
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset,
protein=protein,
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score']))
except KeyError:
pass
......@@ -255,18 +249,14 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_drugs()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
bulk = set()
def add_drdis(edge):
try:
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
disorder=self.cache.get_disorder_by_mondo(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
except KeyError:
pass
......
......@@ -187,6 +187,9 @@ class Command(BaseCommand):
pass
def handle(self, *args, **kwargs):
run()
def run():
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
......
......@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
class DatabasePopulator:
def __init__(self, data_dir):
self.data_dir = data_dir
......@@ -82,6 +84,10 @@ class Command(BaseCommand):
parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
def handle(self, *args, **kwargs):
populate(kwargs)
def populate(kwargs):
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir']
......@@ -152,7 +158,9 @@ class Command(BaseCommand):
if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update)
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
......@@ -170,7 +178,8 @@ class Command(BaseCommand):
if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url),
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update)
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
......
......@@ -9,19 +9,13 @@ class DataPopulator:
self.cache = cache
def populate_expressions(self, update):
if update:
models.ExpressionLevel.objects.all().delete()
self.cache.init_proteins()
df = DataLoader.load_expressions()
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
try:
tissue_model = models.Tissue.objects.get(name=tissue_name)
except models.Tissue.DoesNotExist:
tissue_model = models.Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
proteins_linked = 0
bulk = set()
......@@ -33,7 +27,7 @@ class DataPopulator:
for protein_model in self.cache.get_proteins_by_gene(gene_name):
proteins_linked += 1
if not update or self.cache.is_new_protein(protein_model):
for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model,
......@@ -59,8 +53,6 @@ class DataPopulator:
Returns:
int: Count of how many ensg-protein relations were added
"""
if update:
models.EnsemblGene.objects.all().delete()
self.cache.init_proteins()
data = DataLoader.load_ensg()
bulk = list()
......@@ -69,6 +61,7 @@ class DataPopulator:
proteins = self.cache.get_proteins_by_entrez(entrez)
for protein in proteins:
for ensg in ensg_list:
if not update or self.cache.is_new_protein(protein):
bulk.append(models.EnsemblGene(name=ensg, protein=protein))
models.EnsemblGene.objects.bulk_create(bulk)
return len(bulk)
......@@ -81,8 +74,6 @@ class DataPopulator:
int: Count of how many interactions were added
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_string()
bulk = list()
......@@ -92,19 +83,15 @@ class DataPopulator:
proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
except KeyError:
# continue if not found
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -117,8 +104,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_apid()
bulk = set()
for _, row in df.iterrows():
......@@ -129,14 +114,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.add(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -149,8 +132,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_biogrid()
bulk = list()
for _, row in df.iterrows():
......@@ -164,15 +145,12 @@ class DataPopulator:
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -186,8 +164,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_chembl()
bulk = set()
for _, row in df.iterrows():
......@@ -202,6 +178,7 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
......@@ -220,9 +197,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_disorders()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
df = DataLoader.load_pdis_disgenet()
bulk = set()
for _, row in df.iterrows():
......@@ -238,6 +212,7 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset,
protein=protein,
......@@ -256,8 +231,6 @@ class DataPopulator:
"""
self.cache.init_drugs()
self.cache.init_disorders()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
df = DataLoader.load_drdis_drugbank()
bulk = set()
......@@ -274,6 +247,7 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset,
drug=drug,
......@@ -292,24 +266,19 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_dgidb()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
......@@ -328,25 +297,19 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_drugbank()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
......
......@@ -10,6 +10,10 @@ class NodeCache:
disorders = dict()
drugs = dict()
drug_updates = set()
disorder_updates = set()
protein_updates = set()
def init_protein_maps(self):
print("Generating protein id maps...")
for protein in self.proteins.values():
......@@ -20,23 +24,39 @@ class NodeCache:
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()
def init_drugs(self):
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id):
return self.proteins[uniprot_id]
......
......@@ -84,6 +84,9 @@ class Protein(models.Model):
def __ne__(self, other):
return not self.__eq__(other)
def __hash__(self):
return hash((self.uniprot_code, self.gene, self.entrez))
def update(self, other):
self.uniprot_code = other.uniprot_code
self.gene = other.gene
......
......@@ -3,6 +3,6 @@ from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = {
'update_db': {
'task': 'drugstone.tasks.task_update_db_from_nedrex',
'schedule': crontab(minute='*/1'),
'schedule': crontab(day_of_week=1, hour=5, minute=0),
},
}
from celery import shared_task
from celery.utils.log import get_task_logger
from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data
from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
@shared_task
def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.')
print('here')
logger.info('Fetching data...')
# fetch_nedrex_data()
logger.info('Integrating data...')
# integrate_nedrex_data()
logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Recreating networks...')
make_graphs()
logger.info('Done.')
......@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then
#if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
......@@ -14,6 +14,6 @@ if ! test -f "$file"; then
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
fi
#fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment