Skip to content
Snippets Groups Projects
Commit db404c9a authored by AndiMajore's avatar AndiMajore
Browse files

removed module file

parent a77c8913
No related branches found
No related tags found
No related merge requests found
[submodule "python_nedrex"]
path = python_nedrex
url = git@github.com:repotrial/python_nedrex.git
......@@ -12,7 +12,7 @@ services:
- 'docker-django.env.dev'
restart: always
# volumes:
# - ./:/usr/src/drugstone/
# - drugstone_backend_volume:/usr/src/drugstone/
ports:
- 8001:8000
networks:
......@@ -97,4 +97,5 @@ networks:
drugstone_net:
volumes:
drugstone_postgres_volume:
\ No newline at end of file
drugstone_postgres_volume:
drugstone_backend_volume:
\ No newline at end of file
from collections import defaultdict
import python_nedrex as nedrex
from python_nedrex.core import get_nodes, get_edges, get_api_key
from drugstone import models
def iter_node_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
def identify_updates(new_list, old_list):
u = list()
c = list()
for id in new_list:
if id not in old_list:
c.append(id)
elif new_list[id] != old_list[id]:
old_list[id].update(new_list[id])
u.append(old_list[id])
return u, c
class nedrex_importer:
def format_list(l):
if l is not None and len(l) > 0:
s = str(l)[1:]
return s[:len(s) - 1]
return ""
class nedrex_importer:
proteins = dict()
entrez_to_uniprot = dict()
gene_name_to_uniprot = defaultdict(lambda: set())
disorders = dict()
drugs = dict()
def __init__(self, base_url):
nedrex.config.set_url_base(base_url)
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
def init_proteins(self):
if len(self.proteins) == 0:
print("Generating protein maps...")
for protein in models.Protein.objects.all():
self.proteins[protein.entrez] = protein
self.entrez_to_uniprot[protein.entrez] = protein.uniprot_code
self.gene_name_to_uniprot[protein.gene].add(protein.uniprot_code)
def init_drugs(self):
if len(self.drugs) == 0:
print("Generating drug map...")
for drug in models.Drug.objects.all():
self.drugs[drug.drug_id] = drug
def import_proteins(self):
import python_nedrex as nedrex
from python_nedrex.core import get_nodes, get_api_key, get_edges
from drugstone.models import Protein
def init_disorders(self):
if len(self.disorders) == 0:
print("Generating disorder map...")
for disorder in models.Disorder.objects.all():
self.disorders[disorder.mondo_id] = disorder
def import_proteins(self, update: bool):
proteins = dict()
gene_to_prots = defaultdict(lambda: set())
def iter_node_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
if update:
self.init_proteins()
def add_protein(node):
id = node['primaryDomainId']
self.proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName'])
print(node)
id = node['primaryDomainId'].split('.')[1]
name = node['geneName']
if len(node['synonyms']) > 0:
name = node['synonyms'][0]
idx = name.index('{')
if idx > 0:
name = name[idx - 1:]
proteins[id] = models.Protein(uniprot_code=id, name=name, gene=node['geneName'])
def add_edges(edge):
id = edge['sourceDomainId']
protein = self.proteins[id]
id = edge['sourceDomainId'].split('.')[1]
protein = proteins[id]
protein.entrez = edge['targetDomainId'].split('.')[1]
gene_to_prots[edge['targetDomainId']].add(id)
def add_genes(node):
id = node['primaryDomainId']
id = node['primaryDomainId'].split('.')[1]
for prot_id in gene_to_prots[id]:
protein = self.proteins[prot_id]
protein = proteins[prot_id]
try:
protein.protein_name = node['synonyms'][0]
except:
pass
nedrex.config.set_url_base("http://82.148.225.92:8123/")
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
print('Importing Proteins')
iter_node_collection('protein', add_protein)
print('Importing Protein-Gene mapping')
iter_edge_collection('protein_encoded_by_gene', add_edges)
print('Mapping Gene information')
iter_node_collection('gene', add_genes)
Protein.objects.bulk_create(self.proteins.values())
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(proteins, self.proteins)
models.Protein.objects.bulk_update(updates)
models.Protein.objects.bulk_create(creates)
for protein in creates:
self.proteins[protein.uniprot_code] = protein
else:
models.Protein.objects.bulk_create(self.proteins.values())
self.proteins = proteins
return len(self.proteins)
def import_drugs(self, update):
drugs = dict()
if update:
self.init_drugs()
def add_drug(node):
id = node['primaryDomainId'].split('.')[1]
drugs[id] = models.Drug(drug_id=id, name=node['displayName'], status=format_list(node['drugGroups']))
iter_node_collection('drug', add_drug)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(drugs, self.drugs)
models.Drug.objects.bulk_update(updates)
models.Drug.objects.bulk_create(creates)
for drug in creates:
self.drugs[drug.drug_id] = drug
else:
models.Drug.objects.bulk_create(self.drugs.values())
self.drugs = drugs
self.drugs = drugs
return len(self.drugs)
def import_disorders(self, update):
disorders = dict()
if update:
self.init_disorders()
def add_disorder(node):
id = node['primaryDomainId'].split('.')[1]
self.disorders[id] = models.Disorder(mondo_id=id, label=node['displayName'], icd10=format_list(node['icd10']))
iter_node_collection('disorder', add_disorder)
# TODO test updating ideas
if update:
(updates, creates) = identify_updates(disorders, self.disorders)
models.Disorder.objects.bulk_update(updates)
models.Disorder.objects.bulk_create(creates)
for disorder in creates:
self.disorders[disorder.uniprot_code] = disorder
else:
models.Disorder.objects.bulk_create(self.disorders.values())
self.disorders = disorders
self.disorders = disorders
return len(self.disorders)
......@@ -99,7 +99,7 @@ class Command(BaseCommand):
tissue_expression_file=exp_file,
)
importer = nedrex_importer()
importer = nedrex_importer("http://82.148.225.92:8123/")
if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
......@@ -111,6 +111,7 @@ class Command(BaseCommand):
if kwargs['drug_file'] is not None:
print('Populating Drugs...')
n = DataPopulator.populate_drugs(populator)
# n = nedrex_importer.import_drugs(importer,False)
print(f'Populated {n} Drugs.')
# if kwargs['protein_file'] is not None:
......@@ -130,8 +131,8 @@ class Command(BaseCommand):
if kwargs['proteins'] is not None:
print('Populating Proteins...')
n = nedrex_importer.import_proteins(nedrex_importer)
# n = DataPopulator.populate_proteins(populator)
# n = nedrex_importer.import_proteins(importer, False)
n = DataPopulator.populate_proteins(populator)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
......@@ -140,6 +141,7 @@ class Command(BaseCommand):
if kwargs['disorders'] is not None:
print('Populating Disorders...')
# n = nedrex_importer.import_disorders(importer, False)
n = DataPopulator.populate_disorders(populator)
print(f'Populated {n} Disorders.')
......
......@@ -8,7 +8,6 @@ class DataPopulator:
proteins = dict()
uniprot_to_ensembl = dict()
gene_name_to_ensembl = defaultdict(lambda: set())
# protein_name_to_ensembl = dict()
disorders = dict()
drugs = dict()
......@@ -19,7 +18,6 @@ class DataPopulator:
self.proteins[protein.entrez]=protein
self.uniprot_to_ensembl[protein.uniprot_code] = protein.entrez
self.gene_name_to_ensembl[protein.gene].add(protein.entrez)
# self.protein_name_to_ensembl[protein.protein_name] = protein.entrez
def init_drugs(self):
if len(self.drugs)== 0:
......@@ -49,7 +47,6 @@ class DataPopulator:
protein_name=row['protein_name'])
self.uniprot_to_ensembl[row['protein_ac']] = row['entrez_id']
self.gene_name_to_ensembl[row['gene_name']].add(row['entrez_id'])
# self.protein_name_to_ensembl[row['protein_name']] = row['entrez_id']
models.Protein.objects.bulk_create(self.proteins.values())
return len(self.proteins)
......
......@@ -79,7 +79,7 @@ class Protein(models.Model):
# are either 6 or 10 characters long
uniprot_code = models.CharField(max_length=10)
gene = models.CharField(max_length=128, default='') # symbol
gene = models.CharField(max_length=128, default='') # symbol
protein_name = models.CharField(max_length=128, default='')
entrez = models.CharField(max_length=128, default='')
drugs = models.ManyToManyField('Drug', through='ProteinDrugInteraction',
......@@ -93,10 +93,21 @@ class Protein(models.Model):
def __str__(self):
return self.gene
def __eq__(self, other):
return self.uniprot_code == other.uniprot_code and self.gene == other.gene and self.protein_name == other.protein_name and self.entrez == other.entrez
def __ne__(self, other):
return not self.__eq__(other)
def update(self, other):
self.uniprot_code = other.uniprot_code
self.gene = other.gene
self.protein_name = other.protein_name
self.entrez = other.entrez
class Disorder(models.Model):
mondo_id = models.CharField(max_length=7)
label = models.CharField(max_length=256, default='') # symbol
label = models.CharField(max_length=256, default='') # symbol
icd10 = models.CharField(max_length=128, default='')
proteins = models.ManyToManyField(
'Protein', through='ProteinDisorderAssociation', related_name='associated_proteins')
......@@ -107,6 +118,40 @@ class Disorder(models.Model):
def __str__(self):
return self.label
def __eq__(self, other):
return self.mondo_id == other.mondo_id and self.label == other.label and self.icd10 == other.icd10
def __ne__(self, other):
return not self.__eq__(other)
def update(self,other):
self.mondo_id = other.mondo_id
self.label = other.label
self.icd10 = other.icd10
class Drug(models.Model):
drug_id = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=256, default='')
status = models.CharField(max_length=128, default='')
# in_trial = models.BooleanField(default=False)
# in_literature = models.BooleanField(default=False)
links = models.CharField(max_length=16 * 1024, default='')
def __str__(self):
return self.drug_id
def __eq__(self, other):
return self.drug_id == other.uniprot_code and self.name == other.name and self.status == other.status
def __ne__(self,other):
return not self.__eq__(other)
def update(self, other):
self.drug_id = other.drug_id
self.name = other.name
self.status = other.status
self.links = other.links
class ProteinDisorderAssociation(models.Model):
pdis_dataset = models.ForeignKey(
......@@ -121,7 +166,6 @@ class ProteinDisorderAssociation(models.Model):
def __str__(self):
return f'{self.pdis_dataset}-{self.protein}-{self.disorder}'
class DrugDisorderIndication(models.Model):
drdi_dataset = models.ForeignKey(
'DrDiDataset', null=True, on_delete=models.CASCADE, related_name='drdi_dataset_relation')
......@@ -135,18 +179,6 @@ class DrugDisorderIndication(models.Model):
return f'{self.drdi_dataset}-{self.drug}-{self.disorder}'
class Drug(models.Model):
drug_id = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=256, default='')
status = models.CharField(max_length=128, default='')
# in_trial = models.BooleanField(default=False)
# in_literature = models.BooleanField(default=False)
links = models.CharField(max_length=16*1024, default='')
def __str__(self):
return self.drug_id
class ProteinProteinInteraction(models.Model):
ppi_dataset = models.ForeignKey(
'PPIDataset', null=True, on_delete=models.CASCADE, related_name='ppi_dataset_relation')
......@@ -158,12 +190,12 @@ class ProteinProteinInteraction(models.Model):
from_protein=self.from_protein,
to_protein=self.to_protein,
ppi_dataset=self.ppi_dataset
)
)
p2p1_q = ProteinProteinInteraction.objects.filter(
from_protein=self.to_protein,
to_protein=self.from_protein,
ppi_dataset=self.ppi_dataset
)
)
if p1p2_q.exists() or p2p1_q.exists():
raise ValidationError('Protein-Protein interaction must be unique!')
......@@ -175,7 +207,6 @@ class ProteinProteinInteraction(models.Model):
def __str__(self):
return f'{self.ppi_dataset}-{self.from_protein}-{self.to_protein}'
class ProteinDrugInteraction(models.Model):
pdi_dataset = models.ForeignKey(
'PDIDataset', null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation')
......@@ -188,7 +219,6 @@ class ProteinDrugInteraction(models.Model):
def __str__(self):
return f'{self.pdi_dataset}-{self.protein}-{self.drug}'
class Task(models.Model):
token = models.CharField(max_length=32, unique=True)
created_at = models.DateTimeField(auto_now_add=True)
......@@ -208,7 +238,6 @@ class Task(models.Model):
result = models.TextField(null=True)
class Network(models.Model):
id = models.CharField(primary_key=True, max_length=32, unique=True)
created_at = models.DateTimeField(auto_now_add=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment