Skip to content
Snippets Groups Projects
Commit 222231a0 authored by AndiMajore's avatar AndiMajore
Browse files

removed module file

Former-commit-id: 48c08798baa9f29a97de21370e064cfbefe83f84 [formerly a147e273d739122a722a4a9a0f78bcdb607f3642]
Former-commit-id: ed0cb6561aa56c8512bcb53a81e309e872037b2b
parent e70bba60
No related branches found
No related tags found
No related merge requests found
[submodule "python_nedrex"]
path = python_nedrex
url = git@github.com:repotrial/python_nedrex.git
...@@ -12,7 +12,7 @@ services: ...@@ -12,7 +12,7 @@ services:
- 'docker-django.env.dev' - 'docker-django.env.dev'
restart: always restart: always
# volumes: # volumes:
# - ./:/usr/src/drugstone/ # - drugstone_backend_volume:/usr/src/drugstone/
ports: ports:
- 8001:8000 - 8001:8000
networks: networks:
...@@ -97,4 +97,5 @@ networks: ...@@ -97,4 +97,5 @@ networks:
drugstone_net: drugstone_net:
volumes: volumes:
drugstone_postgres_volume: drugstone_postgres_volume:
\ No newline at end of file drugstone_backend_volume:
\ No newline at end of file
from collections import defaultdict from collections import defaultdict
import python_nedrex as nedrex
from python_nedrex.core import get_nodes, get_edges, get_api_key
from drugstone import models
def iter_node_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
def identify_updates(new_list, old_list):
u = list()
c = list()
for id in new_list:
if id not in old_list:
c.append(id)
elif new_list[id] != old_list[id]:
old_list[id].update(new_list[id])
u.append(old_list[id])
return u, c
class nedrex_importer:
def format_list(l):
if l is not None and len(l) > 0:
s = str(l)[1:]
return s[:len(s) - 1]
return ""
class nedrex_importer:
proteins = dict() proteins = dict()
entrez_to_uniprot = dict()
gene_name_to_uniprot = defaultdict(lambda: set())
disorders = dict()
drugs = dict()
def __init__(self, base_url):
nedrex.config.set_url_base(base_url)
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
def init_proteins(self):
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
self.proteins[protein.entrez] = protein
self.entrez_to_uniprot[protein.entrez] = protein.uniprot_code
self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
def init_drugs(self):
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
self.drugs[drug.drug_id] = drug
def import_proteins(self): def init_disorders(self):
import python_nedrex as nedrex if len(self.disorders) == 0:
from python_nedrex.core import get_nodes, get_api_key, get_edges print("Generating disorder map...")
from drugstone.models import Protein for disorder in models.Disorder.objects.all():
self.disorders[disorder.mondo_id] = disorder
def import_proteins(self, update: bool):
proteins = dict()
gene_to_prots = defaultdict(lambda: set()) gene_to_prots = defaultdict(lambda: set())
def iter_node_collection(coll_name, eval): if update:
offset = 0 self.init_proteins()
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
def add_protein(node): def add_protein(node):
id = node['primaryDomainId'] print(node)
self.proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName']) id = node['primaryDomainId'].split('.')[1]
name = node['geneName']
if len(node['synonyms']) > 0:
name = node['synonyms'][0]
idx = name.index('{')
if idx > 0:
name = name[idx - 1:]
proteins[id] = models.Protein(uniprot_code=id, name=name, gene=node['geneName'])
def add_edges(edge): def add_edges(edge):
id = edge['sourceDomainId'] id = edge['sourceDomainId'].split('.')[1]
protein = self.proteins[id] protein = proteins[id]
protein.entrez = edge['targetDomainId'].split('.')[1] protein.entrez = edge['targetDomainId'].split('.')[1]
gene_to_prots[edge['targetDomainId']].add(id) gene_to_prots[edge['targetDomainId']].add(id)
def add_genes(node): def add_genes(node):
id = node['primaryDomainId'] id = node['primaryDomainId'].split('.')[1]
for prot_id in gene_to_prots[id]: for prot_id in gene_to_prots[id]:
protein = self.proteins[prot_id] protein = proteins[prot_id]
try: try:
protein.protein_name = node['synonyms'][0] protein.protein_name = node['synonyms'][0]
except: except:
pass pass
nedrex.config.set_url_base("http://82.148.225.92:8123/")
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
print('Importing Proteins')
iter_node_collection('protein', add_protein) iter_node_collection('protein', add_protein)
print('Importing Protein-Gene mapping')
iter_edge_collection('protein_encoded_by_gene', add_edges) iter_edge_collection('protein_encoded_by_gene', add_edges)
print('Mapping Gene information')
iter_node_collection('gene', add_genes) iter_node_collection('gene', add_genes)
Protein.objects.bulk_create(self.proteins.values()) # TODO test updating ideas
if update:
(updates, creates) = identify_updates(proteins, self.proteins)
models.Protein.objects.bulk_update(updates)
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.proteins[protein.uniprot_code] = protein
else:
models.Protein.objects.bulk_create(self.proteins.values())
self.proteins = proteins
return len(self.proteins) return len(self.proteins)
def import_drugs(self, update):
drugs = dict()
if update:
self.init_drugs()
def add_drug(node):
id = node['primaryDomainId'].split('.')[1]
drugs[id] = models.Drug(drug_id=id, name=node['displayName'], status=format_list(node['drugGroups']))
iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(drugs, self.drugs)
models.Drug.objects.bulk_update(updates)
models.Drug.objects.bulk_create(creates)
for drug in creates:
self.drugs[drug.drug_id] = drug
else:
models.Drug.objects.bulk_create(self.drugs.values())
self.drugs = drugs
self.drugs = drugs
return len(self.drugs)
def import_disorders(self, update):
disorders = dict()
if update:
self.init_disorders()
def add_disorder(node):
id = node['primaryDomainId'].split('.')[1]
self.disorders[id] = models.Disorder(mondo_id=id, label=node['displayName'], icd10=format_list(node['icd10']))
iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(disorders, self.disorders)
models.Disorder.objects.bulk_update(updates)
models.Disorder.objects.bulk_create(creates)
for disorder in creates:
self.disorders[disorder.uniprot_code] = disorder
else:
models.Disorder.objects.bulk_create(self.disorders.values())
self.disorders = disorders
self.disorders = disorders
return len(self.disorders)
...@@ -99,7 +99,7 @@ class Command(BaseCommand): ...@@ -99,7 +99,7 @@ class Command(BaseCommand):
tissue_expression_file=exp_file, tissue_expression_file=exp_file,
) )
importer = nedrex_importer() importer = nedrex_importer("http://82.148.225.92:8123/")
if kwargs['delete_model'] is not None: if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',') model_list = kwargs['delete_model'].split(',')
...@@ -111,6 +111,7 @@ class Command(BaseCommand): ...@@ -111,6 +111,7 @@ class Command(BaseCommand):
if kwargs['drug_file'] is not None: if kwargs['drug_file'] is not None:
print('Populating Drugs...') print('Populating Drugs...')
n = DataPopulator.populate_drugs(populator) n = DataPopulator.populate_drugs(populator)
# n = nedrex_importer.import_drugs(importer,False)
print(f'Populated {n} Drugs.') print(f'Populated {n} Drugs.')
# if kwargs['protein_file'] is not None: # if kwargs['protein_file'] is not None:
...@@ -130,8 +131,8 @@ class Command(BaseCommand): ...@@ -130,8 +131,8 @@ class Command(BaseCommand):
if kwargs['proteins'] is not None: if kwargs['proteins'] is not None:
print('Populating Proteins...') print('Populating Proteins...')
n = nedrex_importer.import_proteins(nedrex_importer) # n = nedrex_importer.import_proteins(importer, False)
# n = DataPopulator.populate_proteins(populator) n = DataPopulator.populate_proteins(populator)
print(f'Populated {n} Proteins.') print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...') print('Populating ENSG IDs...')
...@@ -140,6 +141,7 @@ class Command(BaseCommand): ...@@ -140,6 +141,7 @@ class Command(BaseCommand):
if kwargs['disorders'] is not None: if kwargs['disorders'] is not None:
print('Populating Disorders...') print('Populating Disorders...')
# n = nedrex_importer.import_disorders(importer, False)
n = DataPopulator.populate_disorders(populator) n = DataPopulator.populate_disorders(populator)
print(f'Populated {n} Disorders.') print(f'Populated {n} Disorders.')
......
...@@ -8,7 +8,6 @@ class DataPopulator: ...@@ -8,7 +8,6 @@ class DataPopulator:
proteins = dict() proteins = dict()
uniprot_to_ensembl = dict() uniprot_to_ensembl = dict()
gene_name_to_ensembl = defaultdict(lambda: set()) gene_name_to_ensembl = defaultdict(lambda: set())
# protein_name_to_ensembl = dict()
disorders = dict() disorders = dict()
drugs = dict() drugs = dict()
...@@ -19,7 +18,6 @@ class DataPopulator: ...@@ -19,7 +18,6 @@ class DataPopulator:
self.proteins[protein.entrez]=protein self.proteins[protein.entrez]=protein
self.uniprot_to_ensembl[protein.uniprot_code] = protein.entrez self.uniprot_to_ensembl[protein.uniprot_code] = protein.entrez
self.gene_name_to_ensembl[protein.gene].add(protein.entrez) self.gene_name_to_ensembl[protein.gene].add(protein.entrez)
# self.protein_name_to_ensembl[protein.protein_name] = protein.entrez
def init_drugs(self): def init_drugs(self):
if len(self.drugs)== 0: if len(self.drugs)== 0:
...@@ -49,7 +47,6 @@ class DataPopulator: ...@@ -49,7 +47,6 @@ class DataPopulator:
protein_name=row['protein_name']) protein_name=row['protein_name'])
self.uniprot_to_ensembl[row['protein_ac']] = row['entrez_id'] self.uniprot_to_ensembl[row['protein_ac']] = row['entrez_id']
self.gene_name_to_ensembl[row['gene_name']].add(row['entrez_id']) self.gene_name_to_ensembl[row['gene_name']].add(row['entrez_id'])
# self.protein_name_to_ensembl[row['protein_name']] = row['entrez_id']
models.Protein.objects.bulk_create(self.proteins.values()) models.Protein.objects.bulk_create(self.proteins.values())
return len(self.proteins) return len(self.proteins)
......
...@@ -79,7 +79,7 @@ class Protein(models.Model): ...@@ -79,7 +79,7 @@ class Protein(models.Model):
# are either 6 or 10 characters long # are either 6 or 10 characters long
uniprot_code = models.CharField(max_length=10) uniprot_code = models.CharField(max_length=10)
gene = models.CharField(max_length=128, default='') # symbol gene = models.CharField(max_length=128, default='') # symbol
protein_name = models.CharField(max_length=128, default='') protein_name = models.CharField(max_length=128, default='')
entrez = models.CharField(max_length=128, default='') entrez = models.CharField(max_length=128, default='')
drugs = models.ManyToManyField('Drug', through='ProteinDrugInteraction', drugs = models.ManyToManyField('Drug', through='ProteinDrugInteraction',
...@@ -93,10 +93,21 @@ class Protein(models.Model): ...@@ -93,10 +93,21 @@ class Protein(models.Model):
def __str__(self): def __str__(self):
return self.gene return self.gene
def __eq__(self, other):
return self.uniprot_code == other.uniprot_code and self.gene == other.gene and self.protein_name == other.protein_name and self.entrez == other.entrez
def __ne__(self, other):
return not self.__eq__(other)
def update(self, other):
self.uniprot_code = other.uniprot_code
self.gene = other.gene
self.protein_name = other.protein_name
self.entrez = other.entrez
class Disorder(models.Model): class Disorder(models.Model):
mondo_id = models.CharField(max_length=7) mondo_id = models.CharField(max_length=7)
label = models.CharField(max_length=256, default='') # symbol label = models.CharField(max_length=256, default='') # symbol
icd10 = models.CharField(max_length=128, default='') icd10 = models.CharField(max_length=128, default='')
proteins = models.ManyToManyField( proteins = models.ManyToManyField(
'Protein', through='ProteinDisorderAssociation', related_name='associated_proteins') 'Protein', through='ProteinDisorderAssociation', related_name='associated_proteins')
...@@ -107,6 +118,40 @@ class Disorder(models.Model): ...@@ -107,6 +118,40 @@ class Disorder(models.Model):
def __str__(self): def __str__(self):
return self.label return self.label
def __eq__(self, other):
return self.mondo_id == other.mondo_id and self.label == other.label and self.icd10 == other.icd10
def __ne__(self, other):
return not self.__eq__(other)
def update(self,other):
self.mondo_id = other.mondo_id
self.label = other.label
self.icd10 = other.icd10
class Drug(models.Model):
drug_id = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=256, default='')
status = models.CharField(max_length=128, default='')
# in_trial = models.BooleanField(default=False)
# in_literature = models.BooleanField(default=False)
links = models.CharField(max_length=16 * 1024, default='')
def __str__(self):
return self.drug_id
def __eq__(self, other):
return self.drug_id == other.uniprot_code and self.name == other.name and self.status == other.status
def __ne__(self,other):
return not self.__eq__(other)
def update(self, other):
self.drug_id = other.drug_id
self.name = other.name
self.status = other.status
self.links = other.links
class ProteinDisorderAssociation(models.Model): class ProteinDisorderAssociation(models.Model):
pdis_dataset = models.ForeignKey( pdis_dataset = models.ForeignKey(
...@@ -121,7 +166,6 @@ class ProteinDisorderAssociation(models.Model): ...@@ -121,7 +166,6 @@ class ProteinDisorderAssociation(models.Model):
def __str__(self): def __str__(self):
return f'{self.pdis_dataset}-{self.protein}-{self.disorder}' return f'{self.pdis_dataset}-{self.protein}-{self.disorder}'
class DrugDisorderIndication(models.Model): class DrugDisorderIndication(models.Model):
drdi_dataset = models.ForeignKey( drdi_dataset = models.ForeignKey(
'DrDiDataset', null=True, on_delete=models.CASCADE, related_name='drdi_dataset_relation') 'DrDiDataset', null=True, on_delete=models.CASCADE, related_name='drdi_dataset_relation')
...@@ -135,18 +179,6 @@ class DrugDisorderIndication(models.Model): ...@@ -135,18 +179,6 @@ class DrugDisorderIndication(models.Model):
return f'{self.drdi_dataset}-{self.drug}-{self.disorder}' return f'{self.drdi_dataset}-{self.drug}-{self.disorder}'
class Drug(models.Model):
drug_id = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=256, default='')
status = models.CharField(max_length=128, default='')
# in_trial = models.BooleanField(default=False)
# in_literature = models.BooleanField(default=False)
links = models.CharField(max_length=16*1024, default='')
def __str__(self):
return self.drug_id
class ProteinProteinInteraction(models.Model): class ProteinProteinInteraction(models.Model):
ppi_dataset = models.ForeignKey( ppi_dataset = models.ForeignKey(
'PPIDataset', null=True, on_delete=models.CASCADE, related_name='ppi_dataset_relation') 'PPIDataset', null=True, on_delete=models.CASCADE, related_name='ppi_dataset_relation')
...@@ -158,12 +190,12 @@ class ProteinProteinInteraction(models.Model): ...@@ -158,12 +190,12 @@ class ProteinProteinInteraction(models.Model):
from_protein=self.from_protein, from_protein=self.from_protein,
to_protein=self.to_protein, to_protein=self.to_protein,
ppi_dataset=self.ppi_dataset ppi_dataset=self.ppi_dataset
) )
p2p1_q = ProteinProteinInteraction.objects.filter( p2p1_q = ProteinProteinInteraction.objects.filter(
from_protein=self.to_protein, from_protein=self.to_protein,
to_protein=self.from_protein, to_protein=self.from_protein,
ppi_dataset=self.ppi_dataset ppi_dataset=self.ppi_dataset
) )
if p1p2_q.exists() or p2p1_q.exists(): if p1p2_q.exists() or p2p1_q.exists():
raise ValidationError('Protein-Protein interaction must be unique!') raise ValidationError('Protein-Protein interaction must be unique!')
...@@ -175,7 +207,6 @@ class ProteinProteinInteraction(models.Model): ...@@ -175,7 +207,6 @@ class ProteinProteinInteraction(models.Model):
def __str__(self): def __str__(self):
return f'{self.ppi_dataset}-{self.from_protein}-{self.to_protein}' return f'{self.ppi_dataset}-{self.from_protein}-{self.to_protein}'
class ProteinDrugInteraction(models.Model): class ProteinDrugInteraction(models.Model):
pdi_dataset = models.ForeignKey( pdi_dataset = models.ForeignKey(
'PDIDataset', null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation') 'PDIDataset', null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation')
...@@ -188,7 +219,6 @@ class ProteinDrugInteraction(models.Model): ...@@ -188,7 +219,6 @@ class ProteinDrugInteraction(models.Model):
def __str__(self): def __str__(self):
return f'{self.pdi_dataset}-{self.protein}-{self.drug}' return f'{self.pdi_dataset}-{self.protein}-{self.drug}'
class Task(models.Model): class Task(models.Model):
token = models.CharField(max_length=32, unique=True) token = models.CharField(max_length=32, unique=True)
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
...@@ -208,7 +238,6 @@ class Task(models.Model): ...@@ -208,7 +238,6 @@ class Task(models.Model):
result = models.TextField(null=True) result = models.TextField(null=True)
class Network(models.Model): class Network(models.Model):
id = models.CharField(primary_key=True, max_length=32, unique=True) id = models.CharField(primary_key=True, max_length=32, unique=True)
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment