Skip to content
Snippets Groups Projects
Commit ca0e5a7b authored by AndiMajore's avatar AndiMajore
Browse files

db init cleanup and significant speedup

Former-commit-id: b98d1951d6d94f03ffe97486b492d734a2a4258e [formerly 88e844cb12d9d9e129f9b7f50856d11124a4a475]
Former-commit-id: 66631e2e8e954e5144472bd8ffb170ed4858c30c
parent e46ee3df
No related branches found
No related tags found
No related merge requests found
File moved
This diff is collapsed.
This diff is collapsed.
...@@ -2,7 +2,7 @@ from django.core.management.base import BaseCommand ...@@ -2,7 +2,7 @@ from django.core.management.base import BaseCommand
import pandas as pd import pandas as pd
from django.db import OperationalError, IntegrityError from django.db import OperationalError, IntegrityError
from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset, PDIDataset, Disorder, PDisDataset from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset, PDIDataset, Disorder, PDisDataset, DrDiDataset
from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction
from drugstone.management.includes.DataPopulator import DataPopulator from drugstone.management.includes.DataPopulator import DataPopulator
...@@ -57,55 +57,6 @@ class DatabasePopulator: ...@@ -57,55 +57,6 @@ class DatabasePopulator:
self.delete_model(DrDiDataset) self.delete_model(DrDiDataset)
def populate_drug_model(self):
print('Populating Drug model ...')
drug_df = pd.read_csv(f'{self.data_dir}/data_drugstone/{self.drug_file}', delimiter='\t')
for _, row in drug_df.iterrows():
drug_id = row['drug_id']
drug_name = row['drug_name']
drug_status = row['drug_status']
# links = row['links']
Drug.objects.update_or_create(
drug_id=drug_id,
name=drug_name,
status=drug_status,
# links=links
)
print('Done!\n')
def populate_exp_model(self):
print('Populating Tissue and ExpressionLevel model ...')
exp_df = pd.read_csv(f'{self.data_dir}/data_drugstone/{self.exp_file}', delimiter='\t')
tissues_models = dict()
for tissue_name in exp_df.columns.values[2:]:
try:
tissue_model = Tissue.objects.get(name=tissue_name)
except Tissue.DoesNotExist:
tissue_model = Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
proteins_linked = 0
for _, row in exp_df.iterrows():
gene_name = row['Description']
for protein_model in Protein.objects.filter(gene=gene_name).all():
proteins_linked += 1
for tissue_name, tissue_model in tissues_models.items():
try:
ExpressionLevel.objects.create(protein=protein_model,
tissue=tissue_model,
expression_level=row[tissue_name])
except IntegrityError:
pass
print(f'Added {proteins_linked} expression levels!\n')
class Command(BaseCommand): class Command(BaseCommand):
def add_arguments(self, parser): def add_arguments(self, parser):
...@@ -139,6 +90,7 @@ class Command(BaseCommand): ...@@ -139,6 +90,7 @@ class Command(BaseCommand):
pp = kwargs['protein_protein'] pp = kwargs['protein_protein']
pd = kwargs['protein_drug'] pd = kwargs['protein_drug']
db_populator = DatabasePopulator(data_dir=data_dir, db_populator = DatabasePopulator(data_dir=data_dir,
# protein_file=protein_file, # protein_file=protein_file,
drug_file=drug_file, drug_file=drug_file,
...@@ -152,8 +104,12 @@ class Command(BaseCommand): ...@@ -152,8 +104,12 @@ class Command(BaseCommand):
db_populator.delete_models(model_list) db_populator.delete_models(model_list)
return return
populator = DataPopulator()
if kwargs['drug_file'] is not None: if kwargs['drug_file'] is not None:
db_populator.populate_drug_model() print('Populating Drugs...')
n = DataPopulator.populate_drugs(populator)
print(f'Populated {n} Drugs.')
# if kwargs['protein_file'] is not None: # if kwargs['protein_file'] is not None:
# db_poulator.populate_protein_model() # db_poulator.populate_protein_model()
...@@ -165,52 +121,54 @@ class Command(BaseCommand): ...@@ -165,52 +121,54 @@ class Command(BaseCommand):
# db_poulator.populate_ppi_model() # db_poulator.populate_ppi_model()
if kwargs['exp_file'] is not None: if kwargs['exp_file'] is not None:
db_populator.populate_exp_model() print('Populating Expressions...')
n = DataPopulator.populate_expessions(populator)
print(f'Populated {n} Expressions.')
if kwargs['proteins'] is not None: if kwargs['proteins'] is not None:
print('Populating Proteins...') print('Populating Proteins...')
n = DataPopulator.populate_proteins() n = DataPopulator.populate_proteins(populator)
print(f'Populated {n} Proteins.') print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...') print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg() n = DataPopulator.populate_ensg(populator)
print(f'Populated {n} ENSG IDs.') print(f'Populated {n} ENSG IDs.')
if kwargs['disorders'] is not None: if kwargs['disorders'] is not None:
print('Populating Disorders...') print('Populating Disorders...')
n = DataPopulator.populate_disorders() n = DataPopulator.populate_disorders(populator)
print(f'Populated {n} Disorders.') print(f'Populated {n} Disorders.')
if kwargs['protein_protein'] is not None: if kwargs['protein_protein'] is not None:
print('Populating PPIs from STRING...') print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string() n = DataPopulator.populate_ppi_string(populator)
print(f'Populated {n} PPIs from STRING.') print(f'Populated {n} PPIs from STRING.')
print('Populating PPIs from APID...') print('Populating PPIs from APID...')
n = DataPopulator.populate_ppi_apid() n = DataPopulator.populate_ppi_apid(populator)
print(f'Populated {n} PPIs from APID.') print(f'Populated {n} PPIs from APID.')
print('Populating PPIs from BioGRID...') print('Populating PPIs from BioGRID...')
n = DataPopulator.populate_ppi_biogrid() n = DataPopulator.populate_ppi_biogrid(populator)
print(f'Populated {n} PPIs from BioGRID.') print(f'Populated {n} PPIs from BioGRID.')
if kwargs['protein_drug'] is not None: if kwargs['protein_drug'] is not None:
print('Populating PDIs from Chembl...') print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl() n = DataPopulator.populate_pdi_chembl(populator)
print(f'Populated {n} PDIs from Chembl.') print(f'Populated {n} PDIs from Chembl.')
print('Populating PDIs from DGIdb...') print('Populating PDIs from DGIdb...')
n = DataPopulator.populate_pdi_dgidb() n = DataPopulator.populate_pdi_dgidb(populator)
print(f'Populated {n} PDIs from DGIdb.') print(f'Populated {n} PDIs from DGIdb.')
print('Populating PDIs from DrugBank...') print('Populating PDIs from DrugBank...')
n = DataPopulator.populate_pdi_drugbank() n = DataPopulator.populate_pdi_drugbank(populator)
print(f'Populated {n} PDIs from DrugBank.') print(f'Populated {n} PDIs from DrugBank.')
if kwargs['protein_disorder'] is not None: if kwargs['protein_disorder'] is not None:
print('Populating PDis associations from DisGeNET...') print('Populating PDis associations from DisGeNET...')
n=DataPopulator.populate_pdis_disgenet() n=DataPopulator.populate_pdis_disgenet(populator)
print(f'Populated {n} PDis associations from DisGeNET.') print(f'Populated {n} PDis associations from DisGeNET.')
if kwargs['drug_disorder'] is not None: if kwargs['drug_disorder'] is not None:
print('Populating DrDi indications from DrugBank...') print('Populating DrDi indications from DrugBank...')
n=DataPopulator.populate_drdis_drugbank() n=DataPopulator.populate_drdis_drugbank(populator)
print(f'Populated {n} DrDi associations from DrugBank.') print(f'Populated {n} DrDi associations from DrugBank.')
...@@ -4,6 +4,8 @@ import json ...@@ -4,6 +4,8 @@ import json
class DataLoader: class DataLoader:
PATH_PROTEINS = 'data_drugstone/Proteins/' PATH_PROTEINS = 'data_drugstone/Proteins/'
PATH_DRUGS = 'data_drugstone/Drugs/'
PATH_EXPR = 'data_drugstone/'
PATH_DISORDERS = 'data_drugstone/Disorders/' PATH_DISORDERS = 'data_drugstone/Disorders/'
PATH_PDI = 'data_drugstone/PDI/' PATH_PDI = 'data_drugstone/PDI/'
PATH_PPI = 'data_drugstone/PPI/' PATH_PPI = 'data_drugstone/PPI/'
...@@ -16,15 +18,20 @@ class DataLoader: ...@@ -16,15 +18,20 @@ class DataLoader:
# Disorders # Disorders
DISORDERS_MONDO = 'disorders.tsv' DISORDERS_MONDO = 'disorders.tsv'
#Drugs
DRUG_FILE = 'drug-file.txt'
#Expressions
EXPR_FILE = 'gene_tissue_expression.gct'
# Protein-Protein-Interactions # Protein-Protein-Interactions
PPI_APID = 'apid_9606_Q2.txt' PPI_APID = 'apid_9606_Q2.txt'
PPI_BIOGRID = 'BIOGRID-ORGANISM-Homo_sapiens-3.5.187.mitab.txt' PPI_BIOGRID = 'BIOGRID-ORGANISM-Homo_sapiens-3.5.187.mitab.txt'
PPI_STRING = 'string_interactions.csv' PPI_STRING = 'string_interactions.csv'
# Protein-Drug-Interactions # Protein-Drug-Interactions
PDI_CHEMBL = 'chembl_drug_gene_interactions.csv' PDI_CHEMBL = 'chembl_drug_gene_interactions_uniq.csv'
PDI_DGIDB = 'DGIdb_drug_gene_interactions.csv' PDI_DGIDB = 'DGIdb_drug_gene_interactions.csv'
PDI_DRUGBANK = 'drugbank_drug_gene_interactions.csv' PDI_DRUGBANK = 'drugbank_drug_gene_interactions_uniq.csv'
# Protein-Disorder-Interaction # Protein-Disorder-Interaction
PDi_DISGENET = 'disgenet-protein_disorder_association.tsv' PDi_DISGENET = 'disgenet-protein_disorder_association.tsv'
...@@ -59,6 +66,14 @@ class DataLoader: ...@@ -59,6 +66,14 @@ class DataLoader:
df = pd.read_csv(f'{DataLoader.PATH_PROTEINS}{DataLoader.PROTEINS_COVEX}') df = pd.read_csv(f'{DataLoader.PATH_PROTEINS}{DataLoader.PROTEINS_COVEX}')
df['entrez_id'] = df['entrez_id'].map(DataLoader._clean_entrez) df['entrez_id'] = df['entrez_id'].map(DataLoader._clean_entrez)
return df return df
@staticmethod
def load_drugs()-> pd.DataFrame:
return pd.read_csv(f'{DataLoader.PATH_DRUGS}{DataLoader.DRUG_FILE}', sep='\t')
@staticmethod
def load_expressions() -> pd.DataFrame:
return pd.read_csv(f'{DataLoader.PATH_EXPR}{DataLoader.EXPR_FILE}', sep='\t')
@staticmethod @staticmethod
def load_disorders() -> pd.DataFrame: def load_disorders() -> pd.DataFrame:
...@@ -206,7 +221,7 @@ class DataLoader: ...@@ -206,7 +221,7 @@ class DataLoader:
Returns: Returns:
pd.DataFrame: columns "drug_id" and "protein_ac" pd.DataFrame: columns "drug_id" and "protein_ac"
""" """
return pd.read_csv(f'{DataLoader.PATH_PDI}{DataLoader.PDI_CHEMBL}', index_col=0) return pd.read_csv(f'{DataLoader.PATH_PDI}{DataLoader.PDI_CHEMBL}')
@staticmethod @staticmethod
def load_pdis_disgenet() -> pd.DataFrame: def load_pdis_disgenet() -> pd.DataFrame:
...@@ -244,6 +259,6 @@ class DataLoader: ...@@ -244,6 +259,6 @@ class DataLoader:
Returns: Returns:
pd.DataFrame: columns "drug_id" and "entrez_id" pd.DataFrame: columns "drug_id" and "entrez_id"
""" """
df = pd.read_csv(f'{DataLoader.PATH_PDI}{DataLoader.PDI_DRUGBANK}', index_col=0).dropna() df = pd.read_csv(f'{DataLoader.PATH_PDI}{DataLoader.PDI_DRUGBANK}').dropna()
df['entrez_id'] = df['entrez_id'].map(DataLoader._clean_entrez) df['entrez_id'] = df['entrez_id'].map(DataLoader._clean_entrez)
return df return df
from collections import defaultdict
from drugstone.management.includes.DataLoader import DataLoader from drugstone.management.includes.DataLoader import DataLoader
import drugstone.models as models import drugstone.models as models
class DataPopulator: class DataPopulator:
proteins = dict()
uniprot_to_ensembl = dict()
gene_name_to_ensembl = defaultdict(lambda: set())
# protein_name_to_ensembl = dict()
disorders = dict()
drugs = dict()
def init_proteins(self):
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
self.proteins[protein.entrez]=protein
self.uniprot_to_ensembl[protein.uniprot_code] = protein.entrez
self.gene_name_to_ensembl[protein.gene].add(protein.entrez)
# self.protein_name_to_ensembl[protein.protein_name] = protein.entrez
def init_drugs(self):
if len(self.drugs)== 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
self.drugs[drug.drug_id]=drug
def populate_proteins() -> int: def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
self.disorders[disorder.mondo_id]=disorder
def populate_proteins(self) -> int:
""" Populates the Protein table in the django database. """ Populates the Protein table in the django database.
Handles loading the data and passing it to the django database Handles loading the data and passing it to the django database
...@@ -12,18 +41,20 @@ class DataPopulator: ...@@ -12,18 +41,20 @@ class DataPopulator:
int: Count of how many proteins were added int: Count of how many proteins were added
""" """
df = DataLoader.load_proteins() df = DataLoader.load_proteins()
proteins = list()
for _, row in df.iterrows(): for _, row in df.iterrows():
proteins.append(models.Protein( self.proteins[row['entrez_id']] = models.Protein(
uniprot_code=row['protein_ac'], uniprot_code=row['protein_ac'],
gene=row['gene_name'], gene=row['gene_name'],
entrez=row['entrez_id'], entrez=row['entrez_id'],
protein_name=row['protein_name']) protein_name=row['protein_name'])
) self.uniprot_to_ensembl[row['protein_ac']] = row['entrez_id']
models.Protein.objects.bulk_create(proteins) self.gene_name_to_ensembl[row['gene_name']].add(row['entrez_id'])
return len(proteins) # self.protein_name_to_ensembl[row['protein_name']] = row['entrez_id']
models.Protein.objects.bulk_create(self.proteins.values())
return len(self.proteins)
def populate_disorders() -> int: def populate_disorders(self) -> int:
""" Populates the Disorder table in the django database. """ Populates the Disorder table in the django database.
Handles loading the data and passing it to the django database Handles loading the data and passing it to the django database
...@@ -31,17 +62,63 @@ class DataPopulator: ...@@ -31,17 +62,63 @@ class DataPopulator:
int: Count of how many disorders were added int: Count of how many disorders were added
""" """
df = DataLoader.load_disorders() df = DataLoader.load_disorders()
bulk = list()
for _, row in df.iterrows(): for _, row in df.iterrows():
bulk.append(models.Disorder( self.disorders[row['mondo_id']] = models.Disorder(
mondo_id=row['mondo_id'], mondo_id=row['mondo_id'],
label=row['label'], label=row['label'],
icd10=row['icd10'] icd10=row['icd10']
)) )
models.Disorder.objects.bulk_create(bulk) models.Disorder.objects.bulk_create(self.disorders.values())
return len(self.disorders)
def populate_drugs(self):
df = DataLoader.load_drugs()
for _, row in df.iterrows():
drug_id = row['drug_id']
drug_name = row['drug_name']
drug_status = row['drug_status']
self.drugs[drug_id] = models.Drug(
drug_id=drug_id,
name=drug_name,
status=drug_status)
models.Drug.objects.bulk_create(self.drugs.values())
return len(self.drugs)
def populate_expessions(self):
self.init_proteins()
df = DataLoader.load_expressions()
tissues_models = dict()
for tissue_name in df.columns.values[2:]:
try:
tissue_model = models.Tissue.objects.get(name=tissue_name)
except models.Tissue.DoesNotExist:
tissue_model = models.Tissue.objects.create(name=tissue_name)
tissues_models[tissue_name] = tissue_model
proteins_linked = 0
unique = set()
bulk = list()
for _, row in df.iterrows():
gene_name = row['Description']
for protein_id in self.gene_name_to_ensembl[gene_name]:
protein_model = self.proteins[protein_id]
proteins_linked += 1
for tissue_name, tissue_model in tissues_models.items():
id = f"{tissue_name}_{protein_id}"
if id in unique:
continue
unique.add(id)
bulk.append(models.ExpressionLevel(protein=protein_model,
tissue=tissue_model,
expression_level=row[tissue_name]))
models.ExpressionLevel.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_ensg() -> int: def populate_ensg(self) -> int:
""" Populates the Ensembl-Gene table in the django database. """ Populates the Ensembl-Gene table in the django database.
Also maps the added ensg entries to the corresponding proteins. Also maps the added ensg entries to the corresponding proteins.
Handles loading the data and passing it to the django database Handles loading the data and passing it to the django database
...@@ -49,22 +126,24 @@ class DataPopulator: ...@@ -49,22 +126,24 @@ class DataPopulator:
Returns: Returns:
int: Count of how many ensg-protein relations were added int: Count of how many ensg-protein relations were added
""" """
self.init_proteins()
data = DataLoader.load_ensg() data = DataLoader.load_ensg()
bulk = list() bulk = list()
for entrez, ensg_list in data.items(): for entrez, ensg_list in data.items():
protein = models.Protein.objects.get(entrez=entrez) protein = self.proteins[entrez]
for ensg in ensg_list: for ensg in ensg_list:
bulk.append(models.EnsemblGene(name=ensg, protein=protein)) bulk.append(models.EnsemblGene(name=ensg, protein=protein))
models.EnsemblGene.objects.bulk_create(bulk) models.EnsemblGene.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_ppi_string() -> int: def populate_ppi_string(self) -> int:
""" Populates the Protein-Protein-Interactions from STRINGdb """ Populates the Protein-Protein-Interactions from STRINGdb
Handles loading the data and passing it to the django database Handles loading the data and passing it to the django database
Returns: Returns:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.init_proteins()
df = DataLoader.load_ppi_string() df = DataLoader.load_ppi_string()
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='STRING', name='STRING',
...@@ -75,9 +154,9 @@ class DataPopulator: ...@@ -75,9 +154,9 @@ class DataPopulator:
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching proteins # try fetching proteins
protein_a = models.Protein.objects.get(entrez=row['entrez_a']) protein_a = self.proteins[row['entrez_a']]
protein_b = models.Protein.objects.get(entrez=row['entrez_b']) protein_b = self.proteins[row['entrez_b']]
except models.Protein.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
try: try:
...@@ -92,13 +171,14 @@ class DataPopulator: ...@@ -92,13 +171,14 @@ class DataPopulator:
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_ppi_apid() -> int: def populate_ppi_apid(self) -> int:
""" Populates the Protein-Protein-Interactions from Apid """ Populates the Protein-Protein-Interactions from Apid
Handles loading the data and passing it to the django database Handles loading the data and passing it to the django database
Returns: Returns:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.init_proteins()
df = DataLoader.load_ppi_apid() df = DataLoader.load_ppi_apid()
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='APID', name='APID',
...@@ -109,9 +189,9 @@ class DataPopulator: ...@@ -109,9 +189,9 @@ class DataPopulator:
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching proteins # try fetching proteins
protein_a = models.Protein.objects.get(uniprot_code=row['from_protein_ac']) protein_a = self.proteins[self.uniprot_to_ensembl[row['from_protein_ac']]]
protein_b = models.Protein.objects.get(uniprot_code=row['to_protein_ac']) protein_b = self.proteins[self.uniprot_to_ensembl[row['to_protein_ac']]]
except models.Protein.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
try: try:
...@@ -121,18 +201,18 @@ class DataPopulator: ...@@ -121,18 +201,18 @@ class DataPopulator:
to_protein=protein_b to_protein=protein_b
)) ))
except models.ValidationError: except models.ValidationError:
# duplicate
continue continue
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_ppi_biogrid() -> int: def populate_ppi_biogrid(self) -> int:
""" Populates the Protein-Protein-Interactions from BioGRID """ Populates the Protein-Protein-Interactions from BioGRID
Handles loading the data and passing it to the django database Handles loading the data and passing it to the django database
Returns: Returns:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.init_proteins()
df = DataLoader.load_ppi_biogrid() df = DataLoader.load_ppi_biogrid()
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID', name='BioGRID',
...@@ -143,9 +223,10 @@ class DataPopulator: ...@@ -143,9 +223,10 @@ class DataPopulator:
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching proteins # try fetching proteins
protein_a = models.Protein.objects.get(entrez=row['entrez_a']) protein_a = self.proteins[row['entrez_a']]
protein_b = models.Protein.objects.get(entrez=row['entrez_b']) protein_b = self.proteins[row['entrez_b']]
except models.Protein.DoesNotExist: except KeyError:
# TODO update error
# continue if not found # continue if not found
continue continue
try: try:
...@@ -160,13 +241,15 @@ class DataPopulator: ...@@ -160,13 +241,15 @@ class DataPopulator:
models.ProteinProteinInteraction.objects.bulk_create(bulk) models.ProteinProteinInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_pdi_chembl() -> int: def populate_pdi_chembl(self) -> int:
""" Populates the Protein-Drug-Interactions from Chembl """ Populates the Protein-Drug-Interactions from Chembl
Handles Loading the data and passing it to the django database Handles Loading the data and passing it to the django database
Returns: Returns:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.init_proteins()
self.init_drugs()
df = DataLoader.load_pdi_chembl() df = DataLoader.load_pdi_chembl()
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='ChEMBL', name='ChEMBL',
...@@ -176,15 +259,14 @@ class DataPopulator: ...@@ -176,15 +259,14 @@ class DataPopulator:
bulk = list() bulk = list()
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein protein = self.proteins[self.uniprot_to_ensembl[row['protein_ac']]]
protein = models.Protein.objects.get(uniprot_code=row['protein_ac']) except KeyError:
except models.Protein.DoesNotExist:
# continue if not found # continue if not found
continue continue
try: try:
# try fetching drug # try fetching drug
drug = models.Drug.objects.get(drug_id=row['drug_id']) drug = self.drugs[row['drug_id']]
except models.Drug.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
bulk.append(models.ProteinDrugInteraction( bulk.append(models.ProteinDrugInteraction(
...@@ -195,13 +277,15 @@ class DataPopulator: ...@@ -195,13 +277,15 @@ class DataPopulator:
models.ProteinDrugInteraction.objects.bulk_create(bulk) models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_pdis_disgenet() -> int: def populate_pdis_disgenet(self,) -> int:
""" Populates the Protein-Disorder-Interactions from DisGeNET """ Populates the Protein-Disorder-Interactions from DisGeNET
Handles Loading the data and passing it to the django database Handles Loading the data and passing it to the django database
Returns: Returns:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.init_proteins()
self.init_disorders()
df = DataLoader.load_pdis_disgenet() df = DataLoader.load_pdis_disgenet()
dataset, _ = models.PDisDataset.objects.get_or_create( dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET', name='DisGeNET',
...@@ -212,14 +296,14 @@ class DataPopulator: ...@@ -212,14 +296,14 @@ class DataPopulator:
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein # try fetching protein
protein = models.Protein.objects.get(uniprot_code=row['protein_name']) protein = self.proteins[self.uniprot_to_ensembl[row['protein_name']]]
except models.Protein.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
try: try:
# try fetching drug # try fetching drug
disorder = models.Disorder.objects.get(mondo_id=row['disorder_name']) disorder = self.disorders[str(int(row['disorder_name']))]
except models.Disorder.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
bulk.append(models.ProteinDisorderAssociation( bulk.append(models.ProteinDisorderAssociation(
...@@ -231,13 +315,15 @@ class DataPopulator: ...@@ -231,13 +315,15 @@ class DataPopulator:
models.ProteinDisorderAssociation.objects.bulk_create(bulk) models.ProteinDisorderAssociation.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_drdis_drugbank() -> int: def populate_drdis_drugbank(self) -> int:
""" Populates the Drug-Disorder-Indications from DrugBank """ Populates the Drug-Disorder-Indications from DrugBank
Handles Loading the data and passing it to the django database Handles Loading the data and passing it to the django database
Returns: Returns:
int: Count of how many edges were added int: Count of how many edges were added
""" """
self.init_drugs()
self.init_disorders()
df = DataLoader.load_drdis_drugbank() df = DataLoader.load_drdis_drugbank()
dataset, _ = models.DrDiDataset.objects.get_or_create( dataset, _ = models.DrDiDataset.objects.get_or_create(
name='DrugBank', name='DrugBank',
...@@ -248,14 +334,14 @@ class DataPopulator: ...@@ -248,14 +334,14 @@ class DataPopulator:
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein # try fetching protein
drug = models.Drug.objects.get(drug_id=row['drugbank_id']) drug = self.drugs[row['drugbank_id']]
except models.Drug.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
try: try:
# try fetching drug # try fetching drug
disorder = models.Disorder.objects.get(mondo_id=row['mondo_id']) disorder = self.disorders[str(int(row['mondo_id']))]
except models.Disorder.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
bulk.append(models.DrugDisorderIndication( bulk.append(models.DrugDisorderIndication(
...@@ -266,13 +352,15 @@ class DataPopulator: ...@@ -266,13 +352,15 @@ class DataPopulator:
models.DrugDisorderIndication.objects.bulk_create(bulk) models.DrugDisorderIndication.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_pdi_dgidb() -> int: def populate_pdi_dgidb(self) -> int:
""" Populates the Protein-Drug-Interactions from DGIdb """ Populates the Protein-Drug-Interactions from DGIdb
Handles Loading the data and passing it to the django database Handles Loading the data and passing it to the django database
Returns: Returns:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.init_proteins()
self.init_drugs()
df = DataLoader.load_pdi_dgidb() df = DataLoader.load_pdi_dgidb()
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='DGIdb', name='DGIdb',
...@@ -283,14 +371,14 @@ class DataPopulator: ...@@ -283,14 +371,14 @@ class DataPopulator:
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein # try fetching protein
protein = models.Protein.objects.get(entrez=row['entrez_id']) protein = self.proteins[row['entrez_id']]
except models.Protein.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
try: try:
# try fetching drug # try fetching drug
drug = models.Drug.objects.get(drug_id=row['drug_id']) drug = self.drugs[row['drug_id']]
except models.Drug.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
bulk.append(models.ProteinDrugInteraction( bulk.append(models.ProteinDrugInteraction(
...@@ -301,13 +389,15 @@ class DataPopulator: ...@@ -301,13 +389,15 @@ class DataPopulator:
models.ProteinDrugInteraction.objects.bulk_create(bulk) models.ProteinDrugInteraction.objects.bulk_create(bulk)
return len(bulk) return len(bulk)
def populate_pdi_drugbank() -> int: def populate_pdi_drugbank(self) -> int:
""" Populates the Protein-Drug-Interactions from Drugbank """ Populates the Protein-Drug-Interactions from Drugbank
Handles Loading the data and passing it to the django database Handles Loading the data and passing it to the django database
Returns: Returns:
int: Count of how many interactions were added int: Count of how many interactions were added
""" """
self.init_proteins()
self.init_drugs()
df = DataLoader.load_pdi_drugbank() df = DataLoader.load_pdi_drugbank()
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank', name='DrugBank',
...@@ -318,14 +408,14 @@ class DataPopulator: ...@@ -318,14 +408,14 @@ class DataPopulator:
for _, row in df.iterrows(): for _, row in df.iterrows():
try: try:
# try fetching protein # try fetching protein
protein = models.Protein.objects.get(entrez=row['entrez_id']) protein = self.proteins[row['entrez_id']]
except models.Protein.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
try: try:
# try fetching drug # try fetching drug
drug = models.Drug.objects.get(drug_id=row['drug_id']) drug = self.drugs[row['drug_id']]
except models.Drug.DoesNotExist: except KeyError:
# continue if not found # continue if not found
continue continue
bulk.append(models.ProteinDrugInteraction( bulk.append(models.ProteinDrugInteraction(
......
...@@ -78,6 +78,7 @@ class ProteinDrugInteractionView(APIView): ...@@ -78,6 +78,7 @@ class ProteinDrugInteractionView(APIView):
def get(self, request) -> Response: def get(self, request) -> Response:
if request.query_params.get('proteins'): if request.query_params.get('proteins'):
print("getting drugs for proteins")
protein_ac_list = json.loads(request.query_params.get('proteins')) protein_ac_list = json.loads(request.query_params.get('proteins'))
proteins = list(Protein.objects.filter(uniprot_code__in=protein_ac_list).all()) proteins = list(Protein.objects.filter(uniprot_code__in=protein_ac_list).all())
else: else:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment