Skip to content
Snippets Groups Projects
Commit 71505df5 authored by AndiMajore's avatar AndiMajore
Browse files

added update and autoupdate functions

Former-commit-id: 1452720df605af5280396874a2c4f41a06b291ef [formerly c5ffaf4e6fd037c3fd9e0a0f6d71096e6037d754]
Former-commit-id: 0186ba3f7cd4e20538fb0625ac272cae9844a75d
parent 027a9b28
No related branches found
No related tags found
No related merge requests found
......@@ -107,7 +107,6 @@ class NedrexImporter:
proteins = with_entrez
iter_node_collection('gene', add_genes)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(proteins, self.cache.proteins)
......@@ -116,6 +115,8 @@ class NedrexImporter:
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
return len(creates)
else:
models.Protein.objects.bulk_create(proteins.values())
self.cache.proteins = proteins
......@@ -132,14 +133,16 @@ class NedrexImporter:
iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(drugs, self.cache.drugs)
for u in updates:
u.save()
models.Drug.objects.bulk_create(creates)
for drug in creates:
self.cache.drug_updates.add(drug.drug_id)
self.cache.drugs[drug.drug_id] = drug
return len(creates)
else:
models.Drug.objects.bulk_create(drugs.values())
self.cache.drugs = drugs
......@@ -157,14 +160,15 @@ class NedrexImporter:
iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(disorders, self.cache.disorders)
for u in updates:
u.save()
models.Disorder.objects.bulk_create(creates)
for disorder in creates:
self.cache.disorder_updates.add(disorder.mondo_id)
self.cache.disorders[disorder.mondo_id] = disorder
return len(creates)
else:
models.Disorder.objects.bulk_create(disorders.values())
self.cache.disorders = disorders
......@@ -175,18 +179,14 @@ class NedrexImporter:
self.cache.init_drugs()
self.cache.init_proteins()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
bulk = set()
def add_dpi(edge):
try:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
protein=self.cache.get_protein_by_uniprot(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
except KeyError:
pass
......@@ -197,9 +197,6 @@ class NedrexImporter:
def import_protein_protein_interactions(self, dataset, update):
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
bulk = list()
def iter_ppi(eval):
......@@ -216,11 +213,11 @@ class NedrexImporter:
def add_ppi(edge):
try:
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset,
from_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberOne'])),
to_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberTwo']))))
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_protein=protein2))
except KeyError:
pass
......@@ -232,18 +229,15 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_proteins()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
bulk = set()
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset,
protein=protein,
disorder=disorder, score=edge['score']))
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score']))
except KeyError:
pass
......@@ -255,18 +249,14 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_drugs()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
bulk = set()
def add_drdis(edge):
try:
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
disorder=self.cache.get_disorder_by_mondo(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
except KeyError:
pass
......
......@@ -187,19 +187,22 @@ class Command(BaseCommand):
pass
def handle(self, *args, **kwargs):
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
pdi_datasets = models.PDIDataset.objects.all()
pdi_datasets_names = [e.name for e in pdi_datasets]
parameter_combinations = []
for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names:
parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
# close all database connections so subprocesses will create their own connections
# this prevents the processes from running into problems because of using the same connection
db.connections.close_all()
pool = multiprocessing.Pool(KERNEL)
pool.map(create_gt, parameter_combinations)
run()
def run():
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
pdi_datasets = models.PDIDataset.objects.all()
pdi_datasets_names = [e.name for e in pdi_datasets]
parameter_combinations = []
for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names:
parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
# close all database connections so subprocesses will create their own connections
# this prevents the processes from running into problems because of using the same connection
db.connections.close_all()
pool = multiprocessing.Pool(KERNEL)
pool.map(create_gt, parameter_combinations)
\ No newline at end of file
......@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
class DatabasePopulator:
def __init__(self, data_dir):
self.data_dir = data_dir
......@@ -82,107 +84,114 @@ class Command(BaseCommand):
parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
def handle(self, *args, **kwargs):
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir']
db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']:
db_populator.delete_all()
if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
cache = NodeCache()
update = True if kwargs['update'] else False
importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache)
if kwargs['all']:
kwargs['drugs'] = True
kwargs['disorders'] = True
kwargs['proteins'] = True
kwargs['exp'] = True
kwargs['protein_protein'] = True
kwargs['protein_drug'] = True
kwargs['protein_disorder'] = True
kwargs['drug_disorder'] = True
if kwargs['drugs']:
print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update)
print(f'Populated {n} Drugs.')
if kwargs['disorders']:
print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update)
print(f'Populated {n} Disorders.')
if kwargs['proteins']:
print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator,update)
print(f'Populated {n} ENSG IDs.')
if kwargs['exp']:
print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update)
print(f'Populated {n} Expressions.')
if kwargs['protein_protein']:
print('Importing PPIs from NeDRexDB...')
n = NedrexImporter.import_protein_protein_interactions(importer,
populate(kwargs)
def populate(kwargs):
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir']
db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']:
db_populator.delete_all()
if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
cache = NodeCache()
update = True if kwargs['update'] else False
importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache)
if kwargs['all']:
kwargs['drugs'] = True
kwargs['disorders'] = True
kwargs['proteins'] = True
kwargs['exp'] = True
kwargs['protein_protein'] = True
kwargs['protein_drug'] = True
kwargs['protein_disorder'] = True
kwargs['drug_disorder'] = True
if kwargs['drugs']:
print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update)
print(f'Populated {n} Drugs.')
if kwargs['disorders']:
print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update)
print(f'Populated {n} Disorders.')
if kwargs['proteins']:
print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator, update)
print(f'Populated {n} ENSG IDs.')
if kwargs['exp']:
print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update)
print(f'Populated {n} Expressions.')
if kwargs['protein_protein']:
print('Importing PPIs from NeDRexDB...')
n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update)
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator,DatasetLoader.get_drug_target_chembl(), update)
print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
print('Importing DrDis from NeDRexDB...')
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update)
print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
print(f'Populated {n} DrDi associations from DrugBank.')
print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update)
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
print('Importing DrDis from NeDRexDB...')
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update)
print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
print(f'Populated {n} DrDi associations from DrugBank.')
......@@ -9,19 +9,13 @@ class DataPopulator:
self.cache = cache
def populate_expressions(self, update):
if update:
models.ExpressionLevel.objects.all().delete()
self.cache.init_proteins()
df = DataLoader.load_expressions()
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
try:
tissue_model = models.Tissue.objects.get(name=tissue_name)
except models.Tissue.DoesNotExist:
tissue_model = models.Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
proteins_linked = 0
bulk = set()
......@@ -33,16 +27,16 @@ class DataPopulator:
for protein_model in self.cache.get_proteins_by_gene(gene_name):
proteins_linked += 1
for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model,
expression_level=row[tissue_name])
id = hash(expr)
if id in uniq:
continue
uniq.add(id)
bulk.add(expr)
if not update or self.cache.is_new_protein(protein_model):
for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model,
expression_level=row[tissue_name])
id = hash(expr)
if id in uniq:
continue
uniq.add(id)
bulk.add(expr)
if len(bulk) > 100000:
models.ExpressionLevel.objects.bulk_create(bulk)
size += len(bulk)
......@@ -59,8 +53,6 @@ class DataPopulator:
Returns:
int: Count of how many ensg-protein relations were added
"""
if update:
models.EnsemblGene.objects.all().delete()
self.cache.init_proteins()
data = DataLoader.load_ensg()
bulk = list()
......@@ -69,7 +61,8 @@ class DataPopulator:
proteins = self.cache.get_proteins_by_entrez(entrez)
for protein in proteins:
for ensg in ensg_list:
bulk.append(models.EnsemblGene(name=ensg, protein=protein))
if not update or self.cache.is_new_protein(protein):
bulk.append(models.EnsemblGene(name=ensg, protein=protein))
models.EnsemblGene.objects.bulk_create(bulk)
return len(bulk)
......@@ -81,8 +74,6 @@ class DataPopulator:
int: Count of how many interactions were added
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_string()
bulk = list()
......@@ -92,19 +83,15 @@ class DataPopulator:
proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
except KeyError:
# continue if not found
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -117,8 +104,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_apid()
bulk = set()
for _, row in df.iterrows():
......@@ -129,14 +114,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.add(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -149,8 +132,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_biogrid()
bulk = list()
for _, row in df.iterrows():
......@@ -164,15 +145,12 @@ class DataPopulator:
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -186,8 +164,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_chembl()
bulk = set()
for _, row in df.iterrows():
......@@ -202,11 +178,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -220,9 +197,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_disorders()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
df = DataLoader.load_pdis_disgenet()
bulk = set()
for _, row in df.iterrows():
......@@ -238,12 +212,13 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset,
protein=protein,
disorder=disorder,
score=row['score']
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset,
protein=protein,
disorder=disorder,
score=row['score']
))
models.ProteinDisorderAssociation.objects.bulk_create(bulk)
return len(bulk)
......@@ -256,8 +231,6 @@ class DataPopulator:
"""
self.cache.init_drugs()
self.cache.init_disorders()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
df = DataLoader.load_drdis_drugbank()
bulk = set()
......@@ -274,11 +247,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset,
drug=drug,
disorder=disorder,
))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset,
drug=drug,
disorder=disorder,
))
models.DrugDisorderIndication.objects.bulk_create(bulk)
return len(bulk)
......@@ -292,29 +266,24 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_dgidb()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -328,29 +297,23 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_drugbank()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -10,6 +10,10 @@ class NodeCache:
disorders = dict()
drugs = dict()
drug_updates = set()
disorder_updates = set()
protein_updates = set()
def init_protein_maps(self):
print("Generating protein id maps...")
for protein in self.proteins.values():
......@@ -20,23 +24,39 @@ class NodeCache:
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()
def init_drugs(self):
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id):
return self.proteins[uniprot_id]
......
......@@ -84,6 +84,9 @@ class Protein(models.Model):
def __ne__(self, other):
return not self.__eq__(other)
def __hash__(self):
return hash((self.uniprot_code, self.gene, self.entrez))
def update(self, other):
self.uniprot_code = other.uniprot_code
self.gene = other.gene
......
......@@ -3,6 +3,6 @@ from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = {
'update_db': {
'task': 'drugstone.tasks.task_update_db_from_nedrex',
'schedule': crontab(minute='*/1'),
'schedule': crontab(day_of_week=1, hour=5, minute=0),
},
}
from celery import shared_task
from celery.utils.log import get_task_logger
from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data
from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
@shared_task
def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.')
print('here')
logger.info('Fetching data...')
# fetch_nedrex_data()
logger.info('Integrating data...')
# integrate_nedrex_data()
logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Recreating networks...')
make_graphs()
logger.info('Done.')
......@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then
#if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
......@@ -14,6 +14,6 @@ if ! test -f "$file"; then
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
fi
#fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment