Skip to content
Snippets Groups Projects
Commit 71505df5 authored by AndiMajore's avatar AndiMajore
Browse files

added update and autoupdate functions

Former-commit-id: 1452720df605af5280396874a2c4f41a06b291ef [formerly c5ffaf4e6fd037c3fd9e0a0f6d71096e6037d754]
Former-commit-id: 0186ba3f7cd4e20538fb0625ac272cae9844a75d
parent 027a9b28
No related branches found
No related tags found
No related merge requests found
...@@ -107,7 +107,6 @@ class NedrexImporter: ...@@ -107,7 +107,6 @@ class NedrexImporter:
proteins = with_entrez proteins = with_entrez
iter_node_collection('gene', add_genes) iter_node_collection('gene', add_genes)
# TODO test updating ideas
if update: if update:
(updates, creates) = identify_updates(proteins, self.cache.proteins) (updates, creates) = identify_updates(proteins, self.cache.proteins)
...@@ -116,6 +115,8 @@ class NedrexImporter: ...@@ -116,6 +115,8 @@ class NedrexImporter:
models.Protein.objects.bulk_create(creates) models.Protein.objects.bulk_create(creates)
for protein in creates: for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
return len(creates)
else: else:
models.Protein.objects.bulk_create(proteins.values()) models.Protein.objects.bulk_create(proteins.values())
self.cache.proteins = proteins self.cache.proteins = proteins
...@@ -132,14 +133,16 @@ class NedrexImporter: ...@@ -132,14 +133,16 @@ class NedrexImporter:
iter_node_collection('drug', add_drug) iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update: if update:
(updates, creates) = identify_updates(drugs, self.cache.drugs) (updates, creates) = identify_updates(drugs, self.cache.drugs)
for u in updates: for u in updates:
u.save() u.save()
models.Drug.objects.bulk_create(creates) models.Drug.objects.bulk_create(creates)
for drug in creates: for drug in creates:
self.cache.drug_updates.add(drug.drug_id)
self.cache.drugs[drug.drug_id] = drug self.cache.drugs[drug.drug_id] = drug
return len(creates)
else: else:
models.Drug.objects.bulk_create(drugs.values()) models.Drug.objects.bulk_create(drugs.values())
self.cache.drugs = drugs self.cache.drugs = drugs
...@@ -157,14 +160,15 @@ class NedrexImporter: ...@@ -157,14 +160,15 @@ class NedrexImporter:
iter_node_collection('disorder', add_disorder) iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update: if update:
(updates, creates) = identify_updates(disorders, self.cache.disorders) (updates, creates) = identify_updates(disorders, self.cache.disorders)
for u in updates: for u in updates:
u.save() u.save()
models.Disorder.objects.bulk_create(creates) models.Disorder.objects.bulk_create(creates)
for disorder in creates: for disorder in creates:
self.cache.disorder_updates.add(disorder.mondo_id)
self.cache.disorders[disorder.mondo_id] = disorder self.cache.disorders[disorder.mondo_id] = disorder
return len(creates)
else: else:
models.Disorder.objects.bulk_create(disorders.values()) models.Disorder.objects.bulk_create(disorders.values())
self.cache.disorders = disorders self.cache.disorders = disorders
...@@ -175,18 +179,14 @@ class NedrexImporter: ...@@ -175,18 +179,14 @@ class NedrexImporter:
self.cache.init_drugs() self.cache.init_drugs()
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
bulk = set() bulk = set()
def add_dpi(edge): def add_dpi(edge):
try: try:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
drug=self.cache.get_drug_by_drugbank( protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
to_id(edge['sourceDomainId'])), if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
protein=self.cache.get_protein_by_uniprot( bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
to_id(edge['targetDomainId']))))
except KeyError: except KeyError:
pass pass
...@@ -197,9 +197,6 @@ class NedrexImporter: ...@@ -197,9 +197,6 @@ class NedrexImporter:
def import_protein_protein_interactions(self, dataset, update): def import_protein_protein_interactions(self, dataset, update):
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
bulk = list() bulk = list()
def iter_ppi(eval): def iter_ppi(eval):
...@@ -216,11 +213,11 @@ class NedrexImporter: ...@@ -216,11 +213,11 @@ class NedrexImporter:
def add_ppi(edge): def add_ppi(edge):
try: try:
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
from_protein=self.cache.get_protein_by_uniprot( protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
to_id(edge['memberOne'])), if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
to_protein=self.cache.get_protein_by_uniprot( bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_id(edge['memberTwo'])))) to_protein=protein2))
except KeyError: except KeyError:
pass pass
...@@ -232,17 +229,14 @@ class NedrexImporter: ...@@ -232,17 +229,14 @@ class NedrexImporter:
self.cache.init_disorders() self.cache.init_disorders()
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
bulk = set() bulk = set()
def add_pdis(edge): def add_pdis(edge):
try: try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId'])) disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])): for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
protein=protein, bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score'])) disorder=disorder, score=edge['score']))
except KeyError: except KeyError:
pass pass
...@@ -255,18 +249,14 @@ class NedrexImporter: ...@@ -255,18 +249,14 @@ class NedrexImporter:
self.cache.init_disorders() self.cache.init_disorders()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
bulk = set() bulk = set()
def add_drdis(edge): def add_drdis(edge):
try: try:
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
drug=self.cache.get_drug_by_drugbank( disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
to_id(edge['sourceDomainId'])), if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
disorder=self.cache.get_disorder_by_mondo( bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
to_id(edge['targetDomainId']))))
except KeyError: except KeyError:
pass pass
......
...@@ -187,6 +187,9 @@ class Command(BaseCommand): ...@@ -187,6 +187,9 @@ class Command(BaseCommand):
pass pass
def handle(self, *args, **kwargs): def handle(self, *args, **kwargs):
run()
def run():
ppi_datasets = models.PPIDataset.objects.all() ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets] ppi_datasets_names = [e.name for e in ppi_datasets]
......
...@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache ...@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader from drugstone.management.includes import DatasetLoader
class DatabasePopulator: class DatabasePopulator:
def __init__(self, data_dir): def __init__(self, data_dir):
self.data_dir = data_dir self.data_dir = data_dir
...@@ -82,6 +84,10 @@ class Command(BaseCommand): ...@@ -82,6 +84,10 @@ class Command(BaseCommand):
parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications') parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
def handle(self, *args, **kwargs): def handle(self, *args, **kwargs):
populate(kwargs)
def populate(kwargs):
nedrex_api_url = "http://82.148.225.92:8123/" nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir'] data_dir = kwargs['data_dir']
...@@ -152,7 +158,9 @@ class Command(BaseCommand): ...@@ -152,7 +158,9 @@ class Command(BaseCommand):
if kwargs['protein_drug']: if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...') print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update) n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDIs from NeDRexDB') print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...') print('Populating PDIs from Chembl...')
...@@ -170,7 +178,8 @@ class Command(BaseCommand): ...@@ -170,7 +178,8 @@ class Command(BaseCommand):
if kwargs['protein_disorder']: if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...') print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer, n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url), DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update) update)
print(f'Imported {n} PDis from NeDRexDB') print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...') print('Populating PDis associations from DisGeNET...')
......
...@@ -9,19 +9,13 @@ class DataPopulator: ...@@ -9,19 +9,13 @@ class DataPopulator:
self.cache = cache self.cache = cache
def populate_expressions(self, update): def populate_expressions(self, update):
if update:
models.ExpressionLevel.objects.all().delete()
self.cache.init_proteins() self.cache.init_proteins()
df = DataLoader.load_expressions() df = DataLoader.load_expressions()
tissues_models = dict() tissues_models = dict()
for tissue_name in df.columns.values[2:]: for tissue_name in df.columns.values[2:]:
try: tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
tissue_model = models.Tissue.objects.get(name=tissue_name)
except models.Tissue.DoesNotExist:
tissue_model = models.Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
proteins_linked = 0 proteins_linked = 0
bulk = set() bulk = set()
...@@ -33,7 +27,7 @@ class DataPopulator: ...@@ -33,7 +27,7 @@ class DataPopulator:
for protein_model in self.cache.get_proteins_by_gene(gene_name): for protein_model in self.cache.get_proteins_by_gene(gene_name):
proteins_linked += 1 proteins_linked += 1
if not update or self.cache.is_new_protein(protein_model):
for tissue_name, tissue_model in tissues_models.items(): for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model, expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model, tissue=tissue_model,
...@@ -59,8 +53,6 @@ class DataPopulator: ...@@ -59,8 +53,6 @@ class DataPopulator:
Returns: Returns:
int: Count of how many ensg-protein relations were added int: Count of how many ensg-protein relations were added
""" """
if update:
models.EnsemblGene.objects.all().delete()
self.cache.init_proteins() self.cache.init_proteins()
data = DataLoader.load_ensg() data = DataLoader.load_ensg()
bulk = list() bulk = list()
...@@ -69,6 +61,7 @@ class DataPopulator: ...@@ -69,6 +61,7 @@ class DataPopulator:
proteins = self.cache.get_proteins_by_entrez(entrez) proteins = self.cache.get_proteins_by_entrez(entrez)
for protein in proteins: for protein in proteins:
for ensg in ensg_list: for ensg in ensg_list:
if not update or self.cache.is_new_protein(protein):
bulk.append(models.EnsemblGene(name=ensg, protein=protein)) bulk.append(models.EnsemblGene(name=ensg, protein=protein))
models.EnsemblGene.objects.bulk_create(bulk) models.EnsemblGene.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -81,8 +74,6 @@ class DataPopulator: ...@@ -81,8 +74,6 @@ class DataPopulator:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_string() df = DataLoader.load_ppi_string()
bulk = list() bulk = list()
...@@ -92,19 +83,15 @@ class DataPopulator: ...@@ -92,19 +83,15 @@ class DataPopulator:
proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a']) proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b']) proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
except KeyError: except KeyError:
# continue if not found
continue continue
for protein_a in proteins_a: for protein_a in proteins_a:
for protein_b in proteins_b: for protein_b in proteins_b:
try: if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction( bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset, ppi_dataset=dataset,
from_protein=protein_a, from_protein=protein_a,
to_protein=protein_b to_protein=protein_b
)) ))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -117,8 +104,6 @@ class DataPopulator: ...@@ -117,8 +104,6 @@ class DataPopulator:
""" """
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_apid() df = DataLoader.load_ppi_apid()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -129,14 +114,12 @@ class DataPopulator: ...@@ -129,14 +114,12 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
try: if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.add(models.ProteinProteinInteraction( bulk.add(models.ProteinProteinInteraction(
ppi_dataset=dataset, ppi_dataset=dataset,
from_protein=protein_a, from_protein=protein_a,
to_protein=protein_b to_protein=protein_b
)) ))
except models.ValidationError:
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -149,8 +132,6 @@ class DataPopulator: ...@@ -149,8 +132,6 @@ class DataPopulator:
""" """
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_biogrid() df = DataLoader.load_ppi_biogrid()
bulk = list() bulk = list()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -164,15 +145,12 @@ class DataPopulator: ...@@ -164,15 +145,12 @@ class DataPopulator:
continue continue
for protein_a in proteins_a: for protein_a in proteins_a:
for protein_b in proteins_b: for protein_b in proteins_b:
try: if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction( bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset, ppi_dataset=dataset,
from_protein=protein_a, from_protein=protein_a,
to_protein=protein_b to_protein=protein_b
)) ))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -186,8 +164,6 @@ class DataPopulator: ...@@ -186,8 +164,6 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_chembl() df = DataLoader.load_pdi_chembl()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -202,6 +178,7 @@ class DataPopulator: ...@@ -202,6 +178,7 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction( bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset, pdi_dataset=dataset,
protein=protein, protein=protein,
...@@ -220,9 +197,6 @@ class DataPopulator: ...@@ -220,9 +197,6 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_disorders() self.cache.init_disorders()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
df = DataLoader.load_pdis_disgenet() df = DataLoader.load_pdis_disgenet()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -238,6 +212,7 @@ class DataPopulator: ...@@ -238,6 +212,7 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
bulk.add(models.ProteinDisorderAssociation( bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset, pdis_dataset=dataset,
protein=protein, protein=protein,
...@@ -256,8 +231,6 @@ class DataPopulator: ...@@ -256,8 +231,6 @@ class DataPopulator:
""" """
self.cache.init_drugs() self.cache.init_drugs()
self.cache.init_disorders() self.cache.init_disorders()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
df = DataLoader.load_drdis_drugbank() df = DataLoader.load_drdis_drugbank()
bulk = set() bulk = set()
...@@ -274,6 +247,7 @@ class DataPopulator: ...@@ -274,6 +247,7 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication( bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset, drdi_dataset=dataset,
drug=drug, drug=drug,
...@@ -292,24 +266,19 @@ class DataPopulator: ...@@ -292,24 +266,19 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_dgidb() df = DataLoader.load_pdi_dgidb()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id']) proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
try: try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id']) drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
for protein in proteins: for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction( bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset, pdi_dataset=dataset,
protein=protein, protein=protein,
...@@ -328,25 +297,19 @@ class DataPopulator: ...@@ -328,25 +297,19 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_drugbank() df = DataLoader.load_pdi_drugbank()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id']) proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
try: try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id']) drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
for protein in proteins: for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction( bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset, pdi_dataset=dataset,
protein=protein, protein=protein,
......
...@@ -10,6 +10,10 @@ class NodeCache: ...@@ -10,6 +10,10 @@ class NodeCache:
disorders = dict() disorders = dict()
drugs = dict() drugs = dict()
drug_updates = set()
disorder_updates = set()
protein_updates = set()
def init_protein_maps(self): def init_protein_maps(self):
print("Generating protein id maps...") print("Generating protein id maps...")
for protein in self.proteins.values(): for protein in self.proteins.values():
...@@ -20,23 +24,39 @@ class NodeCache: ...@@ -20,23 +24,39 @@ class NodeCache:
if len(self.proteins) == 0: if len(self.proteins) == 0:
print("Generating protein maps...") print("Generating protein maps...")
for protein in models.Protein.objects.all(): for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0): if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps() self.init_protein_maps()
def init_drugs(self): def init_drugs(self):
if len(self.drugs) == 0: if len(self.drugs) == 0:
print("Generating drug map...") print("Generating drug map...")
for drug in models.Drug.objects.all(): for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug self.drugs[drug.drug_id] = drug
def init_disorders(self): def init_disorders(self):
if len(self.disorders) == 0: if len(self.disorders) == 0:
print("Generating disorder map...") print("Generating disorder map...")
for disorder in models.Disorder.objects.all(): for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id): def get_protein_by_uniprot(self,uniprot_id):
return self.proteins[uniprot_id] return self.proteins[uniprot_id]
......
...@@ -84,6 +84,9 @@ class Protein(models.Model): ...@@ -84,6 +84,9 @@ class Protein(models.Model):
def __ne__(self, other): def __ne__(self, other):
return not self.__eq__(other) return not self.__eq__(other)
def __hash__(self):
return hash((self.uniprot_code, self.gene, self.entrez))
def update(self, other): def update(self, other):
self.uniprot_code = other.uniprot_code self.uniprot_code = other.uniprot_code
self.gene = other.gene self.gene = other.gene
......
...@@ -3,6 +3,6 @@ from celery.schedules import crontab ...@@ -3,6 +3,6 @@ from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = { CELERY_BEAT_SCHEDULE = {
'update_db': { 'update_db': {
'task': 'drugstone.tasks.task_update_db_from_nedrex', 'task': 'drugstone.tasks.task_update_db_from_nedrex',
'schedule': crontab(minute='*/1'), 'schedule': crontab(day_of_week=1, hour=5, minute=0),
}, },
} }
from celery import shared_task from celery import shared_task
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
@shared_task @shared_task
def task_update_db_from_nedrex(): def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.') logger.info('Updating DB from NeDRex.')
print('here') logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Fetching data...') logger.info('Recreating networks...')
# fetch_nedrex_data() make_graphs()
logger.info('Integrating data...')
# integrate_nedrex_data()
logger.info('Done.') logger.info('Done.')
...@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock" ...@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then #if ! test -f "$file"; then
# sh scripts/import-data.sh # sh scripts/import-data.sh
python3 manage.py makemigrations drugstone python3 manage.py makemigrations drugstone
python3 manage.py migrate python3 manage.py migrate
...@@ -14,6 +14,6 @@ if ! test -f "$file"; then ...@@ -14,6 +14,6 @@ if ! test -f "$file"; then
python3 manage.py populate_db -u --all python3 manage.py populate_db -u --all
python3 manage.py make_graphs python3 manage.py make_graphs
touch $file touch $file
fi #fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment