Skip to content
Snippets Groups Projects
Commit 1602a919 authored by AndiMajore's avatar AndiMajore
Browse files

added update and autoupdate functions

parent c54d91df
No related branches found
No related tags found
No related merge requests found
Pipeline #12005 failed
......@@ -107,7 +107,6 @@ class NedrexImporter:
proteins = with_entrez
iter_node_collection('gene', add_genes)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(proteins, self.cache.proteins)
......@@ -116,6 +115,8 @@ class NedrexImporter:
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
return len(creates)
else:
models.Protein.objects.bulk_create(proteins.values())
self.cache.proteins = proteins
......@@ -132,14 +133,16 @@ class NedrexImporter:
iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(drugs, self.cache.drugs)
for u in updates:
u.save()
models.Drug.objects.bulk_create(creates)
for drug in creates:
self.cache.drug_updates.add(drug.drug_id)
self.cache.drugs[drug.drug_id] = drug
return len(creates)
else:
models.Drug.objects.bulk_create(drugs.values())
self.cache.drugs = drugs
......@@ -157,14 +160,15 @@ class NedrexImporter:
iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(disorders, self.cache.disorders)
for u in updates:
u.save()
models.Disorder.objects.bulk_create(creates)
for disorder in creates:
self.cache.disorder_updates.add(disorder.mondo_id)
self.cache.disorders[disorder.mondo_id] = disorder
return len(creates)
else:
models.Disorder.objects.bulk_create(disorders.values())
self.cache.disorders = disorders
......@@ -175,18 +179,14 @@ class NedrexImporter:
self.cache.init_drugs()
self.cache.init_proteins()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
bulk = set()
def add_dpi(edge):
try:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
protein=self.cache.get_protein_by_uniprot(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
except KeyError:
pass
......@@ -197,9 +197,6 @@ class NedrexImporter:
def import_protein_protein_interactions(self, dataset, update):
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
bulk = list()
def iter_ppi(eval):
......@@ -216,11 +213,11 @@ class NedrexImporter:
def add_ppi(edge):
try:
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset,
from_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberOne'])),
to_protein=self.cache.get_protein_by_uniprot(
to_id(edge['memberTwo']))))
protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_protein=protein2))
except KeyError:
pass
......@@ -232,18 +229,15 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_proteins()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
bulk = set()
def add_pdis(edge):
try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset,
protein=protein,
disorder=disorder, score=edge['score']))
if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score']))
except KeyError:
pass
......@@ -255,18 +249,14 @@ class NedrexImporter:
self.cache.init_disorders()
self.cache.init_drugs()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
bulk = set()
def add_drdis(edge):
try:
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset,
drug=self.cache.get_drug_by_drugbank(
to_id(edge['sourceDomainId'])),
disorder=self.cache.get_disorder_by_mondo(
to_id(edge['targetDomainId']))))
drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
except KeyError:
pass
......
......@@ -187,19 +187,22 @@ class Command(BaseCommand):
pass
def handle(self, *args, **kwargs):
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
pdi_datasets = models.PDIDataset.objects.all()
pdi_datasets_names = [e.name for e in pdi_datasets]
parameter_combinations = []
for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names:
parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
# close all database connections so subprocesses will create their own connections
# this prevents the processes from running into problems because of using the same connection
db.connections.close_all()
pool = multiprocessing.Pool(KERNEL)
pool.map(create_gt, parameter_combinations)
run()
def run():
ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets]
pdi_datasets = models.PDIDataset.objects.all()
pdi_datasets_names = [e.name for e in pdi_datasets]
parameter_combinations = []
for protein_interaction_dataset in ppi_datasets_names:
for pdi_dataset in pdi_datasets_names:
parameter_combinations.append((protein_interaction_dataset, pdi_dataset))
# close all database connections so subprocesses will create their own connections
# this prevents the processes from running into problems because of using the same connection
db.connections.close_all()
pool = multiprocessing.Pool(KERNEL)
pool.map(create_gt, parameter_combinations)
\ No newline at end of file
......@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
class DatabasePopulator:
def __init__(self, data_dir):
self.data_dir = data_dir
......@@ -82,107 +84,114 @@ class Command(BaseCommand):
parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
def handle(self, *args, **kwargs):
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir']
db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']:
db_populator.delete_all()
if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
cache = NodeCache()
update = True if kwargs['update'] else False
importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache)
if kwargs['all']:
kwargs['drugs'] = True
kwargs['disorders'] = True
kwargs['proteins'] = True
kwargs['exp'] = True
kwargs['protein_protein'] = True
kwargs['protein_drug'] = True
kwargs['protein_disorder'] = True
kwargs['drug_disorder'] = True
if kwargs['drugs']:
print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update)
print(f'Populated {n} Drugs.')
if kwargs['disorders']:
print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update)
print(f'Populated {n} Disorders.')
if kwargs['proteins']:
print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator,update)
print(f'Populated {n} ENSG IDs.')
if kwargs['exp']:
print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update)
print(f'Populated {n} Expressions.')
if kwargs['protein_protein']:
print('Importing PPIs from NeDRexDB...')
n = NedrexImporter.import_protein_protein_interactions(importer,
populate(kwargs)
def populate(kwargs):
nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir']
db_populator = DatabasePopulator(data_dir=data_dir)
if kwargs['clear']:
db_populator.delete_all()
if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
cache = NodeCache()
update = True if kwargs['update'] else False
importer = NedrexImporter(nedrex_api_url, cache)
populator = DataPopulator(cache)
if kwargs['all']:
kwargs['drugs'] = True
kwargs['disorders'] = True
kwargs['proteins'] = True
kwargs['exp'] = True
kwargs['protein_protein'] = True
kwargs['protein_drug'] = True
kwargs['protein_disorder'] = True
kwargs['drug_disorder'] = True
if kwargs['drugs']:
print('Populating Drugs...')
n = NedrexImporter.import_drugs(importer, update)
print(f'Populated {n} Drugs.')
if kwargs['disorders']:
print('Populating Disorders...')
n = NedrexImporter.import_disorders(importer, update)
print(f'Populated {n} Disorders.')
if kwargs['proteins']:
print('Populating Proteins...')
n = NedrexImporter.import_proteins(importer, update)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator, update)
print(f'Populated {n} ENSG IDs.')
if kwargs['exp']:
print('Populating Expressions...')
n = DataPopulator.populate_expressions(populator, update)
print(f'Populated {n} Expressions.')
if kwargs['protein_protein']:
print('Importing PPIs from NeDRexDB...')
n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update)
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator,DatasetLoader.get_drug_target_chembl(), update)
print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
print('Importing DrDis from NeDRexDB...')
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update)
print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
print(f'Populated {n} DrDi associations from DrugBank.')
print(f'Imported {n} PPIs from NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid(populator, DatasetLoader.get_ppi_apid(), update)
print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid(populator, DatasetLoader.get_ppi_biogrid(), update)
print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb(populator, DatasetLoader.get_drug_target_dgidb(), update)
print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank(populator, DatasetLoader.get_drug_target_drugbank(), update)
print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update)
print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...')
n = DataPopulator.populate_pdis_disgenet(populator, DatasetLoader.get_disorder_protein_disgenet(), update)
print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder']:
print('Importing DrDis from NeDRexDB...')
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url),
update)
print(f'Imported {n} DrDis from NeDRexDB')
print('Populating DrDi indications from DrugBank...')
n = DataPopulator.populate_drdis_drugbank(populator, DatasetLoader.get_drug_disorder_drugbank(), update)
print(f'Populated {n} DrDi associations from DrugBank.')
......@@ -9,19 +9,13 @@ class DataPopulator:
self.cache = cache
def populate_expressions(self, update):
if update:
models.ExpressionLevel.objects.all().delete()
self.cache.init_proteins()
df = DataLoader.load_expressions()
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
try:
tissue_model = models.Tissue.objects.get(name=tissue_name)
except models.Tissue.DoesNotExist:
tissue_model = models.Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
proteins_linked = 0
bulk = set()
......@@ -33,16 +27,16 @@ class DataPopulator:
for protein_model in self.cache.get_proteins_by_gene(gene_name):
proteins_linked += 1
for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model,
expression_level=row[tissue_name])
id = hash(expr)
if id in uniq:
continue
uniq.add(id)
bulk.add(expr)
if not update or self.cache.is_new_protein(protein_model):
for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model,
expression_level=row[tissue_name])
id = hash(expr)
if id in uniq:
continue
uniq.add(id)
bulk.add(expr)
if len(bulk) > 100000:
models.ExpressionLevel.objects.bulk_create(bulk)
size += len(bulk)
......@@ -59,8 +53,6 @@ class DataPopulator:
Returns:
int: Count of how many ensg-protein relations were added
"""
if update:
models.EnsemblGene.objects.all().delete()
self.cache.init_proteins()
data = DataLoader.load_ensg()
bulk = list()
......@@ -69,7 +61,8 @@ class DataPopulator:
proteins = self.cache.get_proteins_by_entrez(entrez)
for protein in proteins:
for ensg in ensg_list:
bulk.append(models.EnsemblGene(name=ensg, protein=protein))
if not update or self.cache.is_new_protein(protein):
bulk.append(models.EnsemblGene(name=ensg, protein=protein))
models.EnsemblGene.objects.bulk_create(bulk)
return len(bulk)
......@@ -81,8 +74,6 @@ class DataPopulator:
int: Count of how many interactions were added
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_string()
bulk = list()
......@@ -92,19 +83,15 @@ class DataPopulator:
proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
except KeyError:
# continue if not found
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -117,8 +104,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_apid()
bulk = set()
for _, row in df.iterrows():
......@@ -129,14 +114,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.add(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -149,8 +132,6 @@ class DataPopulator:
"""
self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_biogrid()
bulk = list()
for _, row in df.iterrows():
......@@ -164,15 +145,12 @@ class DataPopulator:
continue
for protein_a in proteins_a:
for protein_b in proteins_b:
try:
if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset,
from_protein=protein_a,
to_protein=protein_b
))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -186,8 +164,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_chembl()
bulk = set()
for _, row in df.iterrows():
......@@ -202,11 +178,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -220,9 +197,6 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_disorders()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
df = DataLoader.load_pdis_disgenet()
bulk = set()
for _, row in df.iterrows():
......@@ -238,12 +212,13 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset,
protein=protein,
disorder=disorder,
score=row['score']
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset,
protein=protein,
disorder=disorder,
score=row['score']
))
models.ProteinDisorderAssociation.objects.bulk_create(bulk)
return len(bulk)
......@@ -256,8 +231,6 @@ class DataPopulator:
"""
self.cache.init_drugs()
self.cache.init_disorders()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
df = DataLoader.load_drdis_drugbank()
bulk = set()
......@@ -274,11 +247,12 @@ class DataPopulator:
except KeyError:
# continue if not found
continue
bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset,
drug=drug,
disorder=disorder,
))
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset,
drug=drug,
disorder=disorder,
))
models.DrugDisorderIndication.objects.bulk_create(bulk)
return len(bulk)
......@@ -292,29 +266,24 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_dgidb()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -328,29 +297,23 @@ class DataPopulator:
self.cache.init_proteins()
self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_drugbank()
bulk = set()
for _, row in df.iterrows():
try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError:
# continue if not found
continue
try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError:
# continue if not found
continue
for protein in proteins:
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset,
protein=protein,
drug=drug
))
models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk)
......@@ -10,6 +10,10 @@ class NodeCache:
disorders = dict()
drugs = dict()
drug_updates = set()
disorder_updates = set()
protein_updates = set()
def init_protein_maps(self):
print("Generating protein id maps...")
for protein in self.proteins.values():
......@@ -20,23 +24,39 @@ class NodeCache:
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps()
def init_drugs(self):
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id):
return self.proteins[uniprot_id]
......
......@@ -84,6 +84,9 @@ class Protein(models.Model):
def __ne__(self, other):
return not self.__eq__(other)
def __hash__(self):
return hash((self.uniprot_code, self.gene, self.entrez))
def update(self, other):
self.uniprot_code = other.uniprot_code
self.gene = other.gene
......
......@@ -3,6 +3,6 @@ from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = {
'update_db': {
'task': 'drugstone.tasks.task_update_db_from_nedrex',
'schedule': crontab(minute='*/1'),
'schedule': crontab(day_of_week=1, hour=5, minute=0),
},
}
from celery import shared_task
from celery.utils.log import get_task_logger
from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data
from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
@shared_task
def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.')
print('here')
logger.info('Fetching data...')
# fetch_nedrex_data()
logger.info('Integrating data...')
# integrate_nedrex_data()
logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Recreating networks...')
make_graphs()
logger.info('Done.')
......@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then
#if ! test -f "$file"; then
# sh scripts/import-data.sh
python3 manage.py makemigrations drugstone
python3 manage.py migrate
......@@ -14,6 +14,6 @@ if ! test -f "$file"; then
python3 manage.py populate_db -u --all
python3 manage.py make_graphs
touch $file
fi
#fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment