Skip to content
Snippets Groups Projects
Commit 1602a919 authored by AndiMajore's avatar AndiMajore
Browse files

added update and autoupdate functions

parent c54d91df
No related branches found
No related tags found
No related merge requests found
Pipeline #12005 failed
...@@ -107,7 +107,6 @@ class NedrexImporter: ...@@ -107,7 +107,6 @@ class NedrexImporter:
proteins = with_entrez proteins = with_entrez
iter_node_collection('gene', add_genes) iter_node_collection('gene', add_genes)
# TODO test updating ideas
if update: if update:
(updates, creates) = identify_updates(proteins, self.cache.proteins) (updates, creates) = identify_updates(proteins, self.cache.proteins)
...@@ -116,6 +115,8 @@ class NedrexImporter: ...@@ -116,6 +115,8 @@ class NedrexImporter:
models.Protein.objects.bulk_create(creates) models.Protein.objects.bulk_create(creates)
for protein in creates: for protein in creates:
self.cache.proteins[protein.uniprot_code] = protein self.cache.proteins[protein.uniprot_code] = protein
self.cache.protein_updates.add(protein.uniprot_code)
return len(creates)
else: else:
models.Protein.objects.bulk_create(proteins.values()) models.Protein.objects.bulk_create(proteins.values())
self.cache.proteins = proteins self.cache.proteins = proteins
...@@ -132,14 +133,16 @@ class NedrexImporter: ...@@ -132,14 +133,16 @@ class NedrexImporter:
iter_node_collection('drug', add_drug) iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update: if update:
(updates, creates) = identify_updates(drugs, self.cache.drugs) (updates, creates) = identify_updates(drugs, self.cache.drugs)
for u in updates: for u in updates:
u.save() u.save()
models.Drug.objects.bulk_create(creates) models.Drug.objects.bulk_create(creates)
for drug in creates: for drug in creates:
self.cache.drug_updates.add(drug.drug_id)
self.cache.drugs[drug.drug_id] = drug self.cache.drugs[drug.drug_id] = drug
return len(creates)
else: else:
models.Drug.objects.bulk_create(drugs.values()) models.Drug.objects.bulk_create(drugs.values())
self.cache.drugs = drugs self.cache.drugs = drugs
...@@ -157,14 +160,15 @@ class NedrexImporter: ...@@ -157,14 +160,15 @@ class NedrexImporter:
iter_node_collection('disorder', add_disorder) iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update: if update:
(updates, creates) = identify_updates(disorders, self.cache.disorders) (updates, creates) = identify_updates(disorders, self.cache.disorders)
for u in updates: for u in updates:
u.save() u.save()
models.Disorder.objects.bulk_create(creates) models.Disorder.objects.bulk_create(creates)
for disorder in creates: for disorder in creates:
self.cache.disorder_updates.add(disorder.mondo_id)
self.cache.disorders[disorder.mondo_id] = disorder self.cache.disorders[disorder.mondo_id] = disorder
return len(creates)
else: else:
models.Disorder.objects.bulk_create(disorders.values()) models.Disorder.objects.bulk_create(disorders.values())
self.cache.disorders = disorders self.cache.disorders = disorders
...@@ -175,18 +179,14 @@ class NedrexImporter: ...@@ -175,18 +179,14 @@ class NedrexImporter:
self.cache.init_drugs() self.cache.init_drugs()
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
bulk = set() bulk = set()
def add_dpi(edge): def add_dpi(edge):
try: try:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
drug=self.cache.get_drug_by_drugbank( protein = self.cache.get_protein_by_uniprot(to_id(edge['targetDomainId']))
to_id(edge['sourceDomainId'])), if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_protein(protein)):
protein=self.cache.get_protein_by_uniprot( bulk.add(models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein))
to_id(edge['targetDomainId']))))
except KeyError: except KeyError:
pass pass
...@@ -197,9 +197,6 @@ class NedrexImporter: ...@@ -197,9 +197,6 @@ class NedrexImporter:
def import_protein_protein_interactions(self, dataset, update): def import_protein_protein_interactions(self, dataset, update):
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
bulk = list() bulk = list()
def iter_ppi(eval): def iter_ppi(eval):
...@@ -216,11 +213,11 @@ class NedrexImporter: ...@@ -216,11 +213,11 @@ class NedrexImporter:
def add_ppi(edge): def add_ppi(edge):
try: try:
bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, protein1 = self.cache.get_protein_by_uniprot(to_id(edge['memberOne']))
from_protein=self.cache.get_protein_by_uniprot( protein2 = self.cache.get_protein_by_uniprot(to_id(edge['memberTwo']))
to_id(edge['memberOne'])), if not update or (self.cache.is_new_protein(protein1) or self.cache.is_new_protein(protein2)):
to_protein=self.cache.get_protein_by_uniprot( bulk.append(models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1,
to_id(edge['memberTwo'])))) to_protein=protein2))
except KeyError: except KeyError:
pass pass
...@@ -232,17 +229,14 @@ class NedrexImporter: ...@@ -232,17 +229,14 @@ class NedrexImporter:
self.cache.init_disorders() self.cache.init_disorders()
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
bulk = set() bulk = set()
def add_pdis(edge): def add_pdis(edge):
try: try:
disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId'])) disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])): for protein in self.cache.get_proteins_by_entrez(to_id(edge['sourceDomainId'])):
bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, if not update or (self.cache.is_new_disease(disorder) or self.cache.is_new_protein(protein)):
protein=protein, bulk.add(models.ProteinDisorderAssociation(pdis_dataset=dataset, protein=protein,
disorder=disorder, score=edge['score'])) disorder=disorder, score=edge['score']))
except KeyError: except KeyError:
pass pass
...@@ -255,18 +249,14 @@ class NedrexImporter: ...@@ -255,18 +249,14 @@ class NedrexImporter:
self.cache.init_disorders() self.cache.init_disorders()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
bulk = set() bulk = set()
def add_drdis(edge): def add_drdis(edge):
try: try:
bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug = self.cache.get_drug_by_drugbank(to_id(edge['sourceDomainId']))
drug=self.cache.get_drug_by_drugbank( disorder = self.cache.get_disorder_by_mondo(to_id(edge['targetDomainId']))
to_id(edge['sourceDomainId'])), if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
disorder=self.cache.get_disorder_by_mondo( bulk.add(models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder))
to_id(edge['targetDomainId']))))
except KeyError: except KeyError:
pass pass
......
...@@ -187,6 +187,9 @@ class Command(BaseCommand): ...@@ -187,6 +187,9 @@ class Command(BaseCommand):
pass pass
def handle(self, *args, **kwargs): def handle(self, *args, **kwargs):
run()
def run():
ppi_datasets = models.PPIDataset.objects.all() ppi_datasets = models.PPIDataset.objects.all()
ppi_datasets_names = [e.name for e in ppi_datasets] ppi_datasets_names = [e.name for e in ppi_datasets]
......
...@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache ...@@ -12,6 +12,8 @@ from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader from drugstone.management.includes import DatasetLoader
class DatabasePopulator: class DatabasePopulator:
def __init__(self, data_dir): def __init__(self, data_dir):
self.data_dir = data_dir self.data_dir = data_dir
...@@ -82,6 +84,10 @@ class Command(BaseCommand): ...@@ -82,6 +84,10 @@ class Command(BaseCommand):
parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications') parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
def handle(self, *args, **kwargs): def handle(self, *args, **kwargs):
populate(kwargs)
def populate(kwargs):
nedrex_api_url = "http://82.148.225.92:8123/" nedrex_api_url = "http://82.148.225.92:8123/"
data_dir = kwargs['data_dir'] data_dir = kwargs['data_dir']
...@@ -152,7 +158,9 @@ class Command(BaseCommand): ...@@ -152,7 +158,9 @@ class Command(BaseCommand):
if kwargs['protein_drug']: if kwargs['protein_drug']:
print('Importing PDIs from NeDRexDB...') print('Importing PDIs from NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer, DatasetLoader.get_drug_target_nedrex(nedrex_api_url), update) n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url),
update)
print(f'Imported {n} PDIs from NeDRexDB') print(f'Imported {n} PDIs from NeDRexDB')
print('Populating PDIs from Chembl...') print('Populating PDIs from Chembl...')
...@@ -170,7 +178,8 @@ class Command(BaseCommand): ...@@ -170,7 +178,8 @@ class Command(BaseCommand):
if kwargs['protein_disorder']: if kwargs['protein_disorder']:
print('Importing PDis from NeDRexDB...') print('Importing PDis from NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer, n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(nedrex_api_url), DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url),
update) update)
print(f'Imported {n} PDis from NeDRexDB') print(f'Imported {n} PDis from NeDRexDB')
print('Populating PDis associations from DisGeNET...') print('Populating PDis associations from DisGeNET...')
......
...@@ -9,19 +9,13 @@ class DataPopulator: ...@@ -9,19 +9,13 @@ class DataPopulator:
self.cache = cache self.cache = cache
def populate_expressions(self, update): def populate_expressions(self, update):
if update:
models.ExpressionLevel.objects.all().delete()
self.cache.init_proteins() self.cache.init_proteins()
df = DataLoader.load_expressions() df = DataLoader.load_expressions()
tissues_models = dict() tissues_models = dict()
for tissue_name in df.columns.values[2:]: for tissue_name in df.columns.values[2:]:
try: tissues_models[tissue_name] = models.Tissue.objects.get_or_create(name=tissue_name)
tissue_model = models.Tissue.objects.get(name=tissue_name)
except models.Tissue.DoesNotExist:
tissue_model = models.Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
proteins_linked = 0 proteins_linked = 0
bulk = set() bulk = set()
...@@ -33,7 +27,7 @@ class DataPopulator: ...@@ -33,7 +27,7 @@ class DataPopulator:
for protein_model in self.cache.get_proteins_by_gene(gene_name): for protein_model in self.cache.get_proteins_by_gene(gene_name):
proteins_linked += 1 proteins_linked += 1
if not update or self.cache.is_new_protein(protein_model):
for tissue_name, tissue_model in tissues_models.items(): for tissue_name, tissue_model in tissues_models.items():
expr = models.ExpressionLevel(protein=protein_model, expr = models.ExpressionLevel(protein=protein_model,
tissue=tissue_model, tissue=tissue_model,
...@@ -59,8 +53,6 @@ class DataPopulator: ...@@ -59,8 +53,6 @@ class DataPopulator:
Returns: Returns:
int: Count of how many ensg-protein relations were added int: Count of how many ensg-protein relations were added
""" """
if update:
models.EnsemblGene.objects.all().delete()
self.cache.init_proteins() self.cache.init_proteins()
data = DataLoader.load_ensg() data = DataLoader.load_ensg()
bulk = list() bulk = list()
...@@ -69,6 +61,7 @@ class DataPopulator: ...@@ -69,6 +61,7 @@ class DataPopulator:
proteins = self.cache.get_proteins_by_entrez(entrez) proteins = self.cache.get_proteins_by_entrez(entrez)
for protein in proteins: for protein in proteins:
for ensg in ensg_list: for ensg in ensg_list:
if not update or self.cache.is_new_protein(protein):
bulk.append(models.EnsemblGene(name=ensg, protein=protein)) bulk.append(models.EnsemblGene(name=ensg, protein=protein))
models.EnsemblGene.objects.bulk_create(bulk) models.EnsemblGene.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -81,8 +74,6 @@ class DataPopulator: ...@@ -81,8 +74,6 @@ class DataPopulator:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_string() df = DataLoader.load_ppi_string()
bulk = list() bulk = list()
...@@ -92,19 +83,15 @@ class DataPopulator: ...@@ -92,19 +83,15 @@ class DataPopulator:
proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a']) proteins_a = self.cache.get_proteins_by_entrez(row['entrez_a'])
proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b']) proteins_b = self.cache.get_proteins_by_entrez(row['entrez_b'])
except KeyError: except KeyError:
# continue if not found
continue continue
for protein_a in proteins_a: for protein_a in proteins_a:
for protein_b in proteins_b: for protein_b in proteins_b:
try: if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction( bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset, ppi_dataset=dataset,
from_protein=protein_a, from_protein=protein_a,
to_protein=protein_b to_protein=protein_b
)) ))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -117,8 +104,6 @@ class DataPopulator: ...@@ -117,8 +104,6 @@ class DataPopulator:
""" """
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_apid() df = DataLoader.load_ppi_apid()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -129,14 +114,12 @@ class DataPopulator: ...@@ -129,14 +114,12 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
try: if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.add(models.ProteinProteinInteraction( bulk.add(models.ProteinProteinInteraction(
ppi_dataset=dataset, ppi_dataset=dataset,
from_protein=protein_a, from_protein=protein_a,
to_protein=protein_b to_protein=protein_b
)) ))
except models.ValidationError:
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -149,8 +132,6 @@ class DataPopulator: ...@@ -149,8 +132,6 @@ class DataPopulator:
""" """
self.cache.init_proteins() self.cache.init_proteins()
if update:
models.ProteinProteinInteraction.objects.filter(ppi_dataset=dataset).delete()
df = DataLoader.load_ppi_biogrid() df = DataLoader.load_ppi_biogrid()
bulk = list() bulk = list()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -164,15 +145,12 @@ class DataPopulator: ...@@ -164,15 +145,12 @@ class DataPopulator:
continue continue
for protein_a in proteins_a: for protein_a in proteins_a:
for protein_b in proteins_b: for protein_b in proteins_b:
try: if not update or (self.cache.is_new_protein(protein_a) or self.cache.is_new_protein(protein_b)):
bulk.append(models.ProteinProteinInteraction( bulk.append(models.ProteinProteinInteraction(
ppi_dataset=dataset, ppi_dataset=dataset,
from_protein=protein_a, from_protein=protein_a,
to_protein=protein_b to_protein=protein_b
)) ))
except models.ValidationError:
# duplicate
continue
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
...@@ -186,8 +164,6 @@ class DataPopulator: ...@@ -186,8 +164,6 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_chembl() df = DataLoader.load_pdi_chembl()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -202,6 +178,7 @@ class DataPopulator: ...@@ -202,6 +178,7 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction( bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset, pdi_dataset=dataset,
protein=protein, protein=protein,
...@@ -220,9 +197,6 @@ class DataPopulator: ...@@ -220,9 +197,6 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_disorders() self.cache.init_disorders()
if update:
models.ProteinDisorderAssociation.objects.filter(pdis_dataset=dataset).delete()
df = DataLoader.load_pdis_disgenet() df = DataLoader.load_pdis_disgenet()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
...@@ -238,6 +212,7 @@ class DataPopulator: ...@@ -238,6 +212,7 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_disease(disorder)):
bulk.add(models.ProteinDisorderAssociation( bulk.add(models.ProteinDisorderAssociation(
pdis_dataset=dataset, pdis_dataset=dataset,
protein=protein, protein=protein,
...@@ -256,8 +231,6 @@ class DataPopulator: ...@@ -256,8 +231,6 @@ class DataPopulator:
""" """
self.cache.init_drugs() self.cache.init_drugs()
self.cache.init_disorders() self.cache.init_disorders()
if update:
models.DrugDisorderIndication.objects.filter(drdi_dataset=dataset).delete()
df = DataLoader.load_drdis_drugbank() df = DataLoader.load_drdis_drugbank()
bulk = set() bulk = set()
...@@ -274,6 +247,7 @@ class DataPopulator: ...@@ -274,6 +247,7 @@ class DataPopulator:
except KeyError: except KeyError:
# continue if not found # continue if not found
continue continue
if not update or (self.cache.is_new_drug(drug) or self.cache.is_new_disease(disorder)):
bulk.add(models.DrugDisorderIndication( bulk.add(models.DrugDisorderIndication(
drdi_dataset=dataset, drdi_dataset=dataset,
drug=drug, drug=drug,
...@@ -292,24 +266,19 @@ class DataPopulator: ...@@ -292,24 +266,19 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_dgidb() df = DataLoader.load_pdi_dgidb()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id']) proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
try: try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id']) drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
for protein in proteins: for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction( bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset, pdi_dataset=dataset,
protein=protein, protein=protein,
...@@ -328,25 +297,19 @@ class DataPopulator: ...@@ -328,25 +297,19 @@ class DataPopulator:
self.cache.init_proteins() self.cache.init_proteins()
self.cache.init_drugs() self.cache.init_drugs()
if update:
models.ProteinDrugInteraction.objects.filter(pdi_dataset=dataset).delete()
df = DataLoader.load_pdi_drugbank() df = DataLoader.load_pdi_drugbank()
bulk = set() bulk = set()
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein
proteins = self.cache.get_proteins_by_entrez(row['entrez_id']) proteins = self.cache.get_proteins_by_entrez(row['entrez_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
try: try:
# try fetching drug
drug = self.cache.get_drug_by_drugbank(row['drug_id']) drug = self.cache.get_drug_by_drugbank(row['drug_id'])
except KeyError: except KeyError:
# continue if not found
continue continue
for protein in proteins: for protein in proteins:
if not update or (self.cache.is_new_protein(protein) or self.cache.is_new_drug(drug)):
bulk.add(models.ProteinDrugInteraction( bulk.add(models.ProteinDrugInteraction(
pdi_dataset=dataset, pdi_dataset=dataset,
protein=protein, protein=protein,
......
...@@ -10,6 +10,10 @@ class NodeCache: ...@@ -10,6 +10,10 @@ class NodeCache:
disorders = dict() disorders = dict()
drugs = dict() drugs = dict()
drug_updates = set()
disorder_updates = set()
protein_updates = set()
def init_protein_maps(self): def init_protein_maps(self):
print("Generating protein id maps...") print("Generating protein id maps...")
for protein in self.proteins.values(): for protein in self.proteins.values():
...@@ -20,23 +24,39 @@ class NodeCache: ...@@ -20,23 +24,39 @@ class NodeCache:
if len(self.proteins) == 0: if len(self.proteins) == 0:
print("Generating protein maps...") print("Generating protein maps...")
for protein in models.Protein.objects.all(): for protein in models.Protein.objects.all():
if protein.id < 1000:
protein.delete()
continue
self.proteins[protein.uniprot_code] = protein self.proteins[protein.uniprot_code] = protein
if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0): if len(self.proteins) > 0 and (len(self.entrez_to_uniprot) == 0 or len(self.gene_name_to_uniprot) == 0):
self.init_protein_maps() self.init_protein_maps()
def init_drugs(self): def init_drugs(self):
if len(self.drugs) == 0: if len(self.drugs) == 0:
print("Generating drug map...") print("Generating drug map...")
for drug in models.Drug.objects.all(): for drug in models.Drug.objects.all():
if drug.id < 1000:
drug.delete()
continue
self.drugs[drug.drug_id] = drug self.drugs[drug.drug_id] = drug
def init_disorders(self): def init_disorders(self):
if len(self.disorders) == 0: if len(self.disorders) == 0:
print("Generating disorder map...") print("Generating disorder map...")
for disorder in models.Disorder.objects.all(): for disorder in models.Disorder.objects.all():
if disorder.id < 1000:
disorder.delete()
continue
self.disorders[disorder.mondo_id] = disorder self.disorders[disorder.mondo_id] = disorder
def is_new_protein(self, protein:models.Protein):
return protein.uniprot_code in self.protein_updates
def is_new_drug(self, drug:models.Drug):
return drug.drug_id in self.drug_updates
def is_new_disease(self, disease:models.Disorder):
return disease.mondo_id in self.disorder_updates
def get_protein_by_uniprot(self,uniprot_id): def get_protein_by_uniprot(self,uniprot_id):
return self.proteins[uniprot_id] return self.proteins[uniprot_id]
......
...@@ -84,6 +84,9 @@ class Protein(models.Model): ...@@ -84,6 +84,9 @@ class Protein(models.Model):
def __ne__(self, other): def __ne__(self, other):
return not self.__eq__(other) return not self.__eq__(other)
def __hash__(self):
return hash((self.uniprot_code, self.gene, self.entrez))
def update(self, other): def update(self, other):
self.uniprot_code = other.uniprot_code self.uniprot_code = other.uniprot_code
self.gene = other.gene self.gene = other.gene
......
...@@ -3,6 +3,6 @@ from celery.schedules import crontab ...@@ -3,6 +3,6 @@ from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = { CELERY_BEAT_SCHEDULE = {
'update_db': { 'update_db': {
'task': 'drugstone.tasks.task_update_db_from_nedrex', 'task': 'drugstone.tasks.task_update_db_from_nedrex',
'schedule': crontab(minute='*/1'), 'schedule': crontab(day_of_week=1, hour=5, minute=0),
}, },
} }
from celery import shared_task from celery import shared_task
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from drugstone.util.nedrex import fetch_nedrex_data, integrate_nedrex_data from drugstone.management.commands.populate_db import populate
from drugstone.management.commands.make_graphs import run as make_graphs
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
nedrex_api_url = "http://82.148.225.92:8123/"
@shared_task @shared_task
def task_update_db_from_nedrex(): def task_update_db_from_nedrex():
logger.info('Updating DB from NeDRex.') logger.info('Updating DB from NeDRex.')
print('here') logger.info('Updating data...')
populate({"all": True, "update": True})
logger.info('Fetching data...') logger.info('Recreating networks...')
# fetch_nedrex_data() make_graphs()
logger.info('Integrating data...')
# integrate_nedrex_data()
logger.info('Done.') logger.info('Done.')
...@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock" ...@@ -5,7 +5,7 @@ file="store/docker-entrypoint.lock"
if ! test -f "$file"; then #if ! test -f "$file"; then
# sh scripts/import-data.sh # sh scripts/import-data.sh
python3 manage.py makemigrations drugstone python3 manage.py makemigrations drugstone
python3 manage.py migrate python3 manage.py migrate
...@@ -14,6 +14,6 @@ if ! test -f "$file"; then ...@@ -14,6 +14,6 @@ if ! test -f "$file"; then
python3 manage.py populate_db -u --all python3 manage.py populate_db -u --all
python3 manage.py make_graphs python3 manage.py make_graphs
touch $file touch $file
fi #fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment