diff --git a/docker-compose.yml b/docker-compose.yml index 39ae760767f0173a0ae4a059845f97a736a091ac..3a1954650eb74eacef931270884db25083654b9f 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,8 +11,8 @@ services: env_file: - 'docker-django.env.dev' restart: always - volumes: - - ./:/usr/src/drugstone/ +# volumes: +# - ./:/usr/src/drugstone/ ports: - 8001:8000 networks: @@ -60,8 +60,8 @@ services: hostname: drugstone_celery env_file: - './docker-django.env.dev' - volumes: - - ./:/usr/src/drugstone/ +# volumes: +# - ./:/usr/src/drugstone/ depends_on: - redis - db @@ -76,8 +76,8 @@ services: hostname: drugstone_celery_beat env_file: - './docker-django.env.dev' - volumes: - - ./:/usr/src/drugstone/ +# volumes: +# - ./:/usr/src/drugstone/ depends_on: - redis - db diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py new file mode 100644 index 0000000000000000000000000000000000000000..39d0001ba5f0aabc89a46651b3466f187c025559 --- /dev/null +++ b/drugstone/management/commands/import_from_nedrex.py @@ -0,0 +1,68 @@ +# from collections import defaultdict +# +# +# def import_proteins(): +# import python_nedrex as nedrex +# from python_nedrex.core import get_nodes, get_api_key, get_edges +# from models import Protein +# +# def iter_node_collection(coll_name, eval): +# offset = 0 +# limit = 10000 +# while True: +# result = get_nodes(coll_name, offset=offset, limit=limit) +# if not result: +# return +# for node in result: +# eval(node) +# offset += limit +# +# def iter_edge_collection(coll_name, eval): +# offset = 0 +# limit = 10000 +# while True: +# result = get_edges(coll_name, offset=offset, limit=limit) +# if not result: +# return +# for edge in result: +# eval(edge) +# offset += limit +# +# def add_protein(node): +# global proteins +# id = node['primaryDomainId'] +# proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName']) +# +# def add_edges(edge): +# global proteins +# id = edge['sourceDomainId'] +# protein = proteins[id] +# protein.entrez = edge['targetDomainId'].split('.')[1] +# global gene_to_prots +# gene_to_prots[edge['targetDomainId']].add(id) +# +# def add_genes(node): +# global proteins +# global gene_to_prots +# id = node['primaryDomainId'] +# for prot_id in gene_to_prots[id]: +# protein = proteins[prot_id] +# try: +# protein.protein_name = node['synonyms'][0] +# except: +# pass +# +# nedrex.config.set_url_base("http://82.148.225.92:8123/") +# api_key = get_api_key(accept_eula=True) +# nedrex.config.set_api_key(api_key) +# +# proteins = dict() +# gene_to_prots = defaultdict(lambda: set()) +# +# print('Importing Proteins') +# iter_node_collection('protein', add_protein) +# print('Importing Protein-Gene mapping') +# iter_edge_collection('protein_encoded_by_gene', add_edges) +# print('Mapping Gene information') +# iter_node_collection('gene', add_genes) +# Protein.objects.bulk_create(proteins.values()) diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py index 3d7578fe0e59684ec6ba33038fa3618361dcfa69..b2dd21a4bb528797563073f4d1b9665b17b30e23 100755 --- a/drugstone/management/commands/populate_db.py +++ b/drugstone/management/commands/populate_db.py @@ -74,47 +74,6 @@ class DatabasePopulator: print('Done!\n') - # def populate_protein_model(self): - # print('Populating Protein model ...') - # protein_df = pd.read_csv(f'{self.data_dir}/data_drugstone/{self.protein_file}', delimiter='\t') - # for _, row in protein_df.iterrows(): - # protein_ac = row['protein_ac'] - # gene_name = row['gene_name'] - # protein_name = row['protein_name'] - - # if gene_name == 'None': - # gene_name = '' - - # protein_object = Protein(uniprot_code=protein_ac, gene=gene_name, protein_name=protein_name) - # protein_object.save() - - # print('Done!\n') - - # def populate_pdi_model(self): - # print('Populating ProteinDrugInteraction model ...') - # pdi_df = pd.read_csv(f'{self.data_dir}/data_drugstone/{self.pdi_file}', delimiter='\t') - - # for _, row in pdi_df.iterrows(): - # protein_ac = row['protein_ac'] - # drug_id = row['drug_id'] - - # try: - # protein_object = Protein.objects.get(uniprot_code=protein_ac) - # except Protein.DoesNotExist: - # print(f'Protein AC {protein_ac} not found in Protein model!') - # continue - - # try: - # drug_object = Drug.objects.get(drug_id=drug_id) - # except Drug.DoesNotExist: - # print(f'Drug ID {drug_id} not found in Drug model!') - # continue - - # # insert protein-drug to PDI model - # pdi_object = ProteinDrugInteraction(protein=protein_object, drug=drug_object) - # pdi_object.save() - - # print('Done!\n') def populate_exp_model(self): print('Populating Tissue and ExpressionLevel model ...') @@ -146,32 +105,6 @@ class DatabasePopulator: print(f'Added {proteins_linked} expression levels!\n') - # def populate_ppi_model(self): - # print('Populating ProteinProteinInteraction model ...') - # pdi_df = pd.read_csv(f'{self.data_dir}/data_drugstone/{self.ppi_file}', delimiter='\t') - - # for _, row in pdi_df.iterrows(): - - # from_protein_ac = row['from_protein_ac'] - # to_protein_ac = row['to_protein_ac'] - - # try: - # from_protein_object = Protein.objects.get(uniprot_code=from_protein_ac) - # except Protein.DoesNotExist: - # print(f'Protein AC {from_protein_ac} not found in Protein model!') - # continue - - # try: - # to_protein_object = Protein.objects.get(uniprot_code=to_protein_ac) - # except Protein.DoesNotExist: - # print(f'Protein AC {to_protein_ac} not found in Protein model!') - # continue - - # # insert protein-protein edge to ProteinProteinInteraction model - # ppi_object = ProteinProteinInteraction(from_protein=from_protein_object, to_protein=to_protein_object) - # ppi_object.save() - - # print('Done!\n') class Command(BaseCommand): diff --git a/drugstone/management/includes/DataPopulator.py b/drugstone/management/includes/DataPopulator.py index 9d9388902713b84a1a28cafc624cc5e98d90036d..c7dac555b0f08b3f28b5556d79b435364e18220b 100755 --- a/drugstone/management/includes/DataPopulator.py +++ b/drugstone/management/includes/DataPopulator.py @@ -12,17 +12,16 @@ class DataPopulator: int: Count of how many proteins were added """ df = DataLoader.load_proteins() - count = 0 + proteins = list() for _, row in df.iterrows(): - _, created = models.Protein.objects.update_or_create( + proteins.append(models.Protein( uniprot_code=row['protein_ac'], gene=row['gene_name'], entrez=row['entrez_id'], - defaults={'protein_name': row['protein_name']} + protein_name=row['protein_name']) ) - if created: - count += 1 - return count + models.Protein.objects.bulk_create(proteins) + return len(proteins) def populate_disorders() -> int: """ Populates the Disorder table in the django database. @@ -32,17 +31,15 @@ class DataPopulator: int: Count of how many disorders were added """ df = DataLoader.load_disorders() - count = 0 + bulk = list() for _, row in df.iterrows(): - _, created = models.Disorder.objects.update_or_create( + bulk.append(models.Disorder( mondo_id=row['mondo_id'], label=row['label'], - icd10=row['icd10'], - defaults={'label': row['label']} - ) - if created: - count += 1 - return count + icd10=row['icd10'] + )) + models.Disorder.objects.bulk_create(bulk) + return len(bulk) def populate_ensg() -> int: """ Populates the Ensembl-Gene table in the django database. @@ -53,14 +50,13 @@ class DataPopulator: int: Count of how many ensg-protein relations were added """ data = DataLoader.load_ensg() - count = 0 + bulk = list() for entrez, ensg_list in data.items(): protein = models.Protein.objects.get(entrez=entrez) for ensg in ensg_list: - _, created = models.EnsemblGene.objects.get_or_create(name=ensg, protein=protein) - if created: - count += 1 - return count + bulk.append(models.EnsemblGene(name=ensg, protein=protein)) + models.EnsemblGene.objects.bulk_create(bulk) + return len(bulk) def populate_ppi_string() -> int: """ Populates the Protein-Protein-Interactions from STRINGdb @@ -74,8 +70,8 @@ class DataPopulator: name='STRING', link='https://string-db.org/', version='11.0' - ) - count = 0 + ) + bulk = list() for _, row in df.iterrows(): try: # try fetching proteins @@ -85,17 +81,16 @@ class DataPopulator: # continue if not found continue try: - _, created = models.ProteinProteinInteraction.objects.get_or_create( + bulk.append(models.ProteinProteinInteraction( ppi_dataset=dataset, from_protein=protein_a, to_protein=protein_b - ) - if created: - count += 1 + )) except models.ValidationError: # duplicate continue - return count + models.ProteinProteinInteraction.objects.bulk_create(bulk) + return len(bulk) def populate_ppi_apid() -> int: """ Populates the Protein-Protein-Interactions from Apid @@ -109,8 +104,8 @@ class DataPopulator: name='APID', link='http://cicblade.dep.usal.es:8080/APID/', version='January 2019' - ) - count = 0 + ) + bulk = list() for _, row in df.iterrows(): try: # try fetching proteins @@ -120,17 +115,16 @@ class DataPopulator: # continue if not found continue try: - _, created = models.ProteinProteinInteraction.objects.get_or_create( + bulk.append(models.ProteinProteinInteraction( ppi_dataset=dataset, from_protein=protein_a, to_protein=protein_b - ) - if created: - count += 1 + )) except models.ValidationError: # duplicate continue - return count + models.ProteinProteinInteraction.objects.bulk_create(bulk) + return len(bulk) def populate_ppi_biogrid() -> int: """ Populates the Protein-Protein-Interactions from BioGRID @@ -144,8 +138,8 @@ class DataPopulator: name='BioGRID', link='https://thebiogrid.org/', version='4.0' - ) - count = 0 + ) + bulk = list() for _, row in df.iterrows(): try: # try fetching proteins @@ -155,17 +149,16 @@ class DataPopulator: # continue if not found continue try: - _, created = models.ProteinProteinInteraction.objects.get_or_create( + bulk.append(models.ProteinProteinInteraction( ppi_dataset=dataset, from_protein=protein_a, to_protein=protein_b - ) - if created: - count += 1 + )) except models.ValidationError: # duplicate continue - return count + models.ProteinProteinInteraction.objects.bulk_create(bulk) + return len(bulk) def populate_pdi_chembl() -> int: """ Populates the Protein-Drug-Interactions from Chembl @@ -179,9 +172,9 @@ class DataPopulator: name='ChEMBL', link='https://www.ebi.ac.uk/chembl/', version='27', - ) - count = 0 - for index, row in df.iterrows(): + ) + bulk = list() + for _, row in df.iterrows(): try: # try fetching protein protein = models.Protein.objects.get(uniprot_code=row['protein_ac']) @@ -194,14 +187,13 @@ class DataPopulator: except models.Drug.DoesNotExist: # continue if not found continue - _, created = models.ProteinDrugInteraction.objects.get_or_create( + bulk.append(models.ProteinDrugInteraction( pdi_dataset=dataset, protein=protein, drug=drug - ) - if created: - count += 1 - return count + )) + models.ProteinProteinInteraction.objects.bulk_create(bulk) + return len(bulk) def populate_pdis_disgenet() -> int: """ Populates the Protein-Disorder-Interactions from DisGeNET @@ -215,9 +207,9 @@ class DataPopulator: name='DisGeNET', link='https://www.disgenet.org/home/', version='6.0', - ) - count = 0 - for index, row in df.iterrows(): + ) + bulk = list() + for _, row in df.iterrows(): try: # try fetching protein protein = models.Protein.objects.get(uniprot_code=row['protein_name']) @@ -230,15 +222,14 @@ class DataPopulator: except models.Disorder.DoesNotExist: # continue if not found continue - _, created = models.ProteinDisorderAssociation.objects.get_or_create( + bulk.append(models.ProteinDisorderAssociation( pdis_dataset=dataset, protein=protein, disorder=disorder, score=row['score'] - ) - if created: - count += 1 - return count + )) + models.ProteinDisorderAssociation.objects.bulk_create(bulk) + return len(bulk) def populate_drdis_drugbank() -> int: """ Populates the Drug-Disorder-Indications from DrugBank @@ -253,8 +244,8 @@ class DataPopulator: link='https://go.drugbank.com/', version='5.1.8', ) - count = 0 - for index, row in df.iterrows(): + bulk = list() + for _, row in df.iterrows(): try: # try fetching protein drug = models.Drug.objects.get(drug_id=row['drugbank_id']) @@ -267,14 +258,13 @@ class DataPopulator: except models.Disorder.DoesNotExist: # continue if not found continue - _, created = models.DrugDisorderIndication.objects.get_or_create( + bulk.append(models.DrugDisorderIndication( drdi_dataset=dataset, drug=drug, disorder=disorder, - ) - if created: - count += 1 - return count + )) + models.DrugDisorderIndication.objects.bulk_create(bulk) + return len(bulk) def populate_pdi_dgidb() -> int: """ Populates the Protein-Drug-Interactions from DGIdb @@ -288,8 +278,8 @@ class DataPopulator: name='DGIdb', link='https://www.dgidb.org/', version='4.2.0' - ) - count = 0 + ) + bulk = list() for _, row in df.iterrows(): try: # try fetching protein @@ -303,14 +293,13 @@ class DataPopulator: except models.Drug.DoesNotExist: # continue if not found continue - _, created = models.ProteinDrugInteraction.objects.get_or_create( + bulk.append(models.ProteinDrugInteraction( pdi_dataset=dataset, protein=protein, drug=drug - ) - if created: - count += 1 - return count + )) + models.ProteinDrugInteraction.objects.bulk_create(bulk) + return len(bulk) def populate_pdi_drugbank() -> int: """ Populates the Protein-Drug-Interactions from Drugbank @@ -324,8 +313,8 @@ class DataPopulator: name='DrugBank', link='https://go.drugbank.com/', version='5.1.7' - ) - count = 0 + ) + bulk = list() for _, row in df.iterrows(): try: # try fetching protein @@ -339,11 +328,10 @@ class DataPopulator: except models.Drug.DoesNotExist: # continue if not found continue - _, created = models.ProteinDrugInteraction.objects.get_or_create( + bulk.append(models.ProteinDrugInteraction( pdi_dataset=dataset, protein=protein, drug=drug - ) - if created: - count += 1 - return count + )) + models.ProteinDrugInteraction.objects.bulk_create(bulk) + return len(bulk) diff --git a/drugstone/settings/celery_schedule.py b/drugstone/settings/celery_schedule.py index 7bab2c532243d229dc4d79985e20e1495eb99ebe..de5a78a072248088a319b48a8149e2359a5e9066 100644 --- a/drugstone/settings/celery_schedule.py +++ b/drugstone/settings/celery_schedule.py @@ -3,6 +3,6 @@ from celery.schedules import crontab CELERY_BEAT_SCHEDULE = { 'update_db': { 'task': 'drugstone.tasks.task_update_db_from_nedrex', - 'schedule': crontab(minute='*/1000'), + 'schedule': crontab(minute='*/1'), }, } diff --git a/drugstone/settings/settings.py b/drugstone/settings/settings.py index 3e4ee44d27010ad160d4b2b0036f0a508aba05d5..6a56d2fadaabe96ea8a7f7db3d0ace35897ec6ff 100755 --- a/drugstone/settings/settings.py +++ b/drugstone/settings/settings.py @@ -24,7 +24,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) SECRET_KEY = os.environ.get('SECRET_KEY') # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = os.environ.get('DEBUG', False) +DEBUG = os.environ.get('DEBUG') == '1' ALLOWED_HOSTS = [ 'localhost', @@ -45,6 +45,7 @@ INSTALLED_APPS = [ 'django.contrib.staticfiles', 'corsheaders', 'drugstone', + # 'python_nedrex', 'rest_framework', ] diff --git a/drugstone/tasks.py b/drugstone/tasks.py index b3df13804602d7b7fcc7cb954cb4f2f52edc56eb..74511629381312164e31a5c8990f90b036fbc25b 100644 --- a/drugstone/tasks.py +++ b/drugstone/tasks.py @@ -11,8 +11,8 @@ def task_update_db_from_nedrex(): print('here') logger.info('Fetching data...') - fetch_nedrex_data() + # fetch_nedrex_data() logger.info('Integrating data...') - integrate_nedrex_data() + # integrate_nedrex_data() logger.info('Done.')