Skip to content
Snippets Groups Projects
Commit 52fa9f99 authored by AndiMajore's avatar AndiMajore
Browse files

removed module file

Former-commit-id: db404c9a
parent 3a92833c
No related branches found
No related tags found
No related merge requests found
[submodule "python_nedrex"]
path = python_nedrex
url = git@github.com:repotrial/python_nedrex.git
......@@ -12,7 +12,7 @@ services:
- 'docker-django.env.dev'
restart: always
# volumes:
# - ./:/usr/src/drugstone/
# - drugstone_backend_volume:/usr/src/drugstone/
ports:
- 8001:8000
networks:
......@@ -97,4 +97,5 @@ networks:
drugstone_net:
volumes:
drugstone_postgres_volume:
\ No newline at end of file
drugstone_postgres_volume:
drugstone_backend_volume:
\ No newline at end of file
from collections import defaultdict
import python_nedrex as nedrex
from python_nedrex.core import get_nodes, get_edges, get_api_key
from drugstone import models
def iter_node_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
def identify_updates(new_list, old_list):
u = list()
c = list()
for id in new_list:
if id not in old_list:
c.append(id)
elif new_list[id] != old_list[id]:
old_list[id].update(new_list[id])
u.append(old_list[id])
return u, c
class nedrex_importer:
def format_list(l):
if l is not None and len(l) > 0:
s = str(l)[1:]
return s[:len(s) - 1]
return ""
class nedrex_importer:
proteins = dict()
entrez_to_uniprot = dict()
gene_name_to_uniprot = defaultdict(lambda: set())
disorders = dict()
drugs = dict()
def __init__(self, base_url):
nedrex.config.set_url_base(base_url)
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
def init_proteins(self):
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
self.proteins[protein.entrez] = protein
self.entrez_to_uniprot[protein.entrez] = protein.uniprot_code
self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
def init_drugs(self):
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
self.drugs[drug.drug_id] = drug
def import_proteins(self):
import python_nedrex as nedrex
from python_nedrex.core import get_nodes, get_api_key, get_edges
from drugstone.models import Protein
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
self.disorders[disorder.mondo_id] = disorder
def import_proteins(self, update: bool):
proteins = dict()
gene_to_prots = defaultdict(lambda: set())
def iter_node_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
if update:
self.init_proteins()
def add_protein(node):
id = node['primaryDomainId']
self.proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName'])
print(node)
id = node['primaryDomainId'].split('.')[1]
name = node['geneName']
if len(node['synonyms']) > 0:
name = node['synonyms'][0]
idx = name.index('{')
if idx > 0:
name = name[idx - 1:]
proteins[id] = models.Protein(uniprot_code=id, name=name, gene=node['geneName'])
def add_edges(edge):
id = edge['sourceDomainId']
protein = self.proteins[id]
id = edge['sourceDomainId'].split('.')[1]
protein = proteins[id]
protein.entrez = edge['targetDomainId'].split('.')[1]
gene_to_prots[edge['targetDomainId']].add(id)
def add_genes(node):
id = node['primaryDomainId']
id = node['primaryDomainId'].split('.')[1]
for prot_id in gene_to_prots[id]:
protein = self.proteins[prot_id]
protein = proteins[prot_id]
try:
protein.protein_name = node['synonyms'][0]
except:
pass
nedrex.config.set_url_base("http://82.148.225.92:8123/")
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
print('Importing Proteins')
iter_node_collection('protein', add_protein)
print('Importing Protein-Gene mapping')
iter_edge_collection('protein_encoded_by_gene', add_edges)
print('Mapping Gene information')
iter_node_collection('gene', add_genes)
Protein.objects.bulk_create(self.proteins.values())
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(proteins, self.proteins)
models.Protein.objects.bulk_update(updates)
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.proteins[protein.uniprot_code] = protein
else:
models.Protein.objects.bulk_create(self.proteins.values())
self.proteins = proteins
return len(self.proteins)
def import_drugs(self, update):
drugs = dict()
if update:
self.init_drugs()
def add_drug(node):
id = node['primaryDomainId'].split('.')[1]
drugs[id] = models.Drug(drug_id=id, name=node['displayName'], status=format_list(node['drugGroups']))
iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(drugs, self.drugs)
models.Drug.objects.bulk_update(updates)
models.Drug.objects.bulk_create(creates)
for drug in creates:
self.drugs[drug.drug_id] = drug
else:
models.Drug.objects.bulk_create(self.drugs.values())
self.drugs = drugs
self.drugs = drugs
return len(self.drugs)
def import_disorders(self, update):
disorders = dict()
if update:
self.init_disorders()
def add_disorder(node):
id = node['primaryDomainId'].split('.')[1]
self.disorders[id] = models.Disorder(mondo_id=id, label=node['displayName'], icd10=format_list(node['icd10']))
iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(disorders, self.disorders)
models.Disorder.objects.bulk_update(updates)
models.Disorder.objects.bulk_create(creates)
for disorder in creates:
self.disorders[disorder.uniprot_code] = disorder
else:
models.Disorder.objects.bulk_create(self.disorders.values())
self.disorders = disorders
self.disorders = disorders
return len(self.disorders)
......@@ -99,7 +99,7 @@ class Command(BaseCommand):
tissue_expression_file=exp_file,
)
importer = nedrex_importer()
importer = nedrex_importer("http://82.148.225.92:8123/")
if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
......@@ -111,6 +111,7 @@ class Command(BaseCommand):
if kwargs['drug_file'] is not None:
print('Populating Drugs...')
n = DataPopulator.populate_drugs(populator)
# n = nedrex_importer.import_drugs(importer,False)
print(f'Populated {n} Drugs.')
# if kwargs['protein_file'] is not None:
......@@ -130,8 +131,8 @@ class Command(BaseCommand):
if kwargs['proteins'] is not None:
print('Populating Proteins...')
n = nedrex_importer.import_proteins(nedrex_importer)
# n = DataPopulator.populate_proteins(populator)
# n = nedrex_importer.import_proteins(importer, False)
n = DataPopulator.populate_proteins(populator)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
......@@ -140,6 +141,7 @@ class Command(BaseCommand):
if kwargs['disorders'] is not None:
print('Populating Disorders...')
# n = nedrex_importer.import_disorders(importer, False)
n = DataPopulator.populate_disorders(populator)
print(f'Populated {n} Disorders.')
......
......@@ -8,7 +8,6 @@ class DataPopulator:
proteins = dict()
uniprot_to_ensembl = dict()
gene_name_to_ensembl = defaultdict(lambda: set())
# protein_name_to_ensembl = dict()
disorders = dict()
drugs = dict()
......@@ -19,7 +18,6 @@ class DataPopulator:
self.proteins[protein.entrez]=protein
self.uniprot_to_ensembl[protein.uniprot_code] = protein.entrez
self.gene_name_to_ensembl[protein.gene].add(protein.entrez)
# self.protein_name_to_ensembl[protein.protein_name] = protein.entrez
def init_drugs(self):
if len(self.drugs)== 0:
......@@ -49,7 +47,6 @@ class DataPopulator:
protein_name=row['protein_name'])
self.uniprot_to_ensembl[row['protein_ac']] = row['entrez_id']
self.gene_name_to_ensembl[row['gene_name']].add(row['entrez_id'])
# self.protein_name_to_ensembl[row['protein_name']] = row['entrez_id']
models.Protein.objects.bulk_create(self.proteins.values())
return len(self.proteins)
......
......@@ -79,7 +79,7 @@ class Protein(models.Model):
# are either 6 or 10 characters long
uniprot_code = models.CharField(max_length=10)
gene = models.CharField(max_length=128, default='') # symbol
gene = models.CharField(max_length=128, default='') # symbol
protein_name = models.CharField(max_length=128, default='')
entrez = models.CharField(max_length=128, default='')
drugs = models.ManyToManyField('Drug', through='ProteinDrugInteraction',
......@@ -93,10 +93,21 @@ class Protein(models.Model):
def __str__(self):
return self.gene
def __eq__(self, other):
return self.uniprot_code == other.uniprot_code and self.gene == other.gene and self.protein_name == other.protein_name and self.entrez == other.entrez
def __ne__(self, other):
return not self.__eq__(other)
def update(self, other):
self.uniprot_code = other.uniprot_code
self.gene = other.gene
self.protein_name = other.protein_name
self.entrez = other.entrez
class Disorder(models.Model):
mondo_id = models.CharField(max_length=7)
label = models.CharField(max_length=256, default='') # symbol
label = models.CharField(max_length=256, default='') # symbol
icd10 = models.CharField(max_length=128, default='')
proteins = models.ManyToManyField(
'Protein', through='ProteinDisorderAssociation', related_name='associated_proteins')
......@@ -107,6 +118,40 @@ class Disorder(models.Model):
def __str__(self):
return self.label
def __eq__(self, other):
return self.mondo_id == other.mondo_id and self.label == other.label and self.icd10 == other.icd10
def __ne__(self, other):
return not self.__eq__(other)
def update(self,other):
self.mondo_id = other.mondo_id
self.label = other.label
self.icd10 = other.icd10
class Drug(models.Model):
drug_id = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=256, default='')
status = models.CharField(max_length=128, default='')
# in_trial = models.BooleanField(default=False)
# in_literature = models.BooleanField(default=False)
links = models.CharField(max_length=16 * 1024, default='')
def __str__(self):
return self.drug_id
def __eq__(self, other):
return self.drug_id == other.uniprot_code and self.name == other.name and self.status == other.status
def __ne__(self,other):
return not self.__eq__(other)
def update(self, other):
self.drug_id = other.drug_id
self.name = other.name
self.status = other.status
self.links = other.links
class ProteinDisorderAssociation(models.Model):
pdis_dataset = models.ForeignKey(
......@@ -121,7 +166,6 @@ class ProteinDisorderAssociation(models.Model):
def __str__(self):
return f'{self.pdis_dataset}-{self.protein}-{self.disorder}'
class DrugDisorderIndication(models.Model):
drdi_dataset = models.ForeignKey(
'DrDiDataset', null=True, on_delete=models.CASCADE, related_name='drdi_dataset_relation')
......@@ -135,18 +179,6 @@ class DrugDisorderIndication(models.Model):
return f'{self.drdi_dataset}-{self.drug}-{self.disorder}'
class Drug(models.Model):
drug_id = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=256, default='')
status = models.CharField(max_length=128, default='')
# in_trial = models.BooleanField(default=False)
# in_literature = models.BooleanField(default=False)
links = models.CharField(max_length=16*1024, default='')
def __str__(self):
return self.drug_id
class ProteinProteinInteraction(models.Model):
ppi_dataset = models.ForeignKey(
'PPIDataset', null=True, on_delete=models.CASCADE, related_name='ppi_dataset_relation')
......@@ -158,12 +190,12 @@ class ProteinProteinInteraction(models.Model):
from_protein=self.from_protein,
to_protein=self.to_protein,
ppi_dataset=self.ppi_dataset
)
)
p2p1_q = ProteinProteinInteraction.objects.filter(
from_protein=self.to_protein,
to_protein=self.from_protein,
ppi_dataset=self.ppi_dataset
)
)
if p1p2_q.exists() or p2p1_q.exists():
raise ValidationError('Protein-Protein interaction must be unique!')
......@@ -175,7 +207,6 @@ class ProteinProteinInteraction(models.Model):
def __str__(self):
return f'{self.ppi_dataset}-{self.from_protein}-{self.to_protein}'
class ProteinDrugInteraction(models.Model):
pdi_dataset = models.ForeignKey(
'PDIDataset', null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation')
......@@ -188,7 +219,6 @@ class ProteinDrugInteraction(models.Model):
def __str__(self):
return f'{self.pdi_dataset}-{self.protein}-{self.drug}'
class Task(models.Model):
token = models.CharField(max_length=32, unique=True)
created_at = models.DateTimeField(auto_now_add=True)
......@@ -208,7 +238,6 @@ class Task(models.Model):
result = models.TextField(null=True)
class Network(models.Model):
id = models.CharField(primary_key=True, max_length=32, unique=True)
created_at = models.DateTimeField(auto_now_add=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment