Skip to content
Snippets Groups Projects
Commit 9a05b483 authored by AndiMajore's avatar AndiMajore
Browse files

fixed automigration issue; activated nedrex based protein, disorder, drug import

Former-commit-id: 29eb95b6491c6db920c34400d4531bd62877baa1 [formerly 532279253490c6d6e903e43fd77fd1a6977157f2]
Former-commit-id: df106b3875c66c5dd6f07d5ef336f579182b85b7
parent 222231a0
No related branches found
No related tags found
No related merge requests found
Showing
with 80 additions and 485 deletions
......@@ -11,8 +11,8 @@ services:
env_file:
- 'docker-django.env.dev'
restart: always
# volumes:
# - drugstone_backend_volume:/usr/src/drugstone/
volumes:
- drugstone_backend_volume:/usr/src/drugstone/drugstone/migrations
ports:
- 8001:8000
networks:
......@@ -55,7 +55,7 @@ services:
- "sh"
- "/usr/src/drugstone/scripts/start_celery_worker.sh"
restart: always
build: .
image: drugstone_backend
container_name: drugstone_celery
hostname: drugstone_celery
env_file:
......@@ -71,7 +71,7 @@ services:
command:
- "sh"
- "/usr/src/drugstone/scripts/start_celery_beat.sh"
build: .
image: drugstone_backend
container_name: drugstone_celery_beat
hostname: drugstone_celery_beat
env_file:
......
......@@ -2,6 +2,10 @@
file="docker-entrypoint.lock"
# exit if entrypoint.lock exists to prevent new import of data every time docker is restarted
python3 manage.py makemigrations drugstone
python3 manage.py migrate
if ! test -f "$file"; then
python3 manage.py createfixtures
python3 manage.py cleanuptasks
......
migrations
......@@ -45,7 +45,7 @@ def identify_updates(new_list, old_list):
def format_list(l):
if l is not None and len(l) > 0:
s = str(l)[1:]
return s[:len(s) - 1]
return s[:len(s) - 1].replace("'","")
return ""
......@@ -89,21 +89,21 @@ class nedrex_importer:
self.init_proteins()
def add_protein(node):
print(node)
id = node['primaryDomainId'].split('.')[1]
name = node['geneName']
if len(node['synonyms']) > 0:
name = node['synonyms'][0]
idx = name.index('{')
if idx > 0:
name = name[idx - 1:]
proteins[id] = models.Protein(uniprot_code=id, name=name, gene=node['geneName'])
if '{' in name:
idx = name.index('{')
if idx > 0:
name = name[:idx - 1]
proteins[id] = models.Protein(uniprot_code=id, protein_name=name, gene=node['geneName'])
def add_edges(edge):
id = edge['sourceDomainId'].split('.')[1]
protein = proteins[id]
protein.entrez = edge['targetDomainId'].split('.')[1]
gene_to_prots[edge['targetDomainId']].add(id)
gene_to_prots[protein.entrez].add(id)
def add_genes(node):
id = node['primaryDomainId'].split('.')[1]
......@@ -125,7 +125,7 @@ class nedrex_importer:
for protein in creates:
self.proteins[protein.uniprot_code] = protein
else:
models.Protein.objects.bulk_create(self.proteins.values())
models.Protein.objects.bulk_create(proteins.values())
self.proteins = proteins
return len(self.proteins)
......@@ -148,10 +148,9 @@ class nedrex_importer:
for drug in creates:
self.drugs[drug.drug_id] = drug
else:
models.Drug.objects.bulk_create(self.drugs.values())
models.Drug.objects.bulk_create(drugs.values())
self.drugs = drugs
self.drugs = drugs
return len(self.drugs)
def import_disorders(self, update):
......@@ -161,7 +160,7 @@ class nedrex_importer:
def add_disorder(node):
id = node['primaryDomainId'].split('.')[1]
self.disorders[id] = models.Disorder(mondo_id=id, label=node['displayName'], icd10=format_list(node['icd10']))
disorders[id] = models.Disorder(mondo_id=id, label=node['displayName'], icd10=format_list(node['icd10']))
iter_node_collection('disorder', add_disorder)
......@@ -173,8 +172,9 @@ class nedrex_importer:
for disorder in creates:
self.disorders[disorder.uniprot_code] = disorder
else:
models.Disorder.objects.bulk_create(self.disorders.values())
models.Disorder.objects.bulk_create(disorders.values())
self.disorders = disorders
self.disorders = disorders
return len(self.disorders)
......@@ -8,17 +8,18 @@ from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction
from drugstone.management.includes.DataPopulator import DataPopulator
from .import_from_nedrex import nedrex_importer
class DatabasePopulator:
def __init__(self, data_dir,
# protein_file,
drug_file,
# drug_file,
# protein_protein_interaction_file,
# protein_drug_interaction_file,
tissue_expression_file,
):
self.data_dir = data_dir
# self.protein_file = protein_file
self.drug_file = drug_file
# self.drug_file = drug_file
# self.ppi_file = protein_protein_interaction_file
# self.pdi_file = protein_drug_interaction_file
self.exp_file = tissue_expression_file
......@@ -63,15 +64,14 @@ class Command(BaseCommand):
# dataset directory
parser.add_argument('-dd', '--data_dir', type=str, help='Dataset directory path')
# parser.add_argument('-p', '--protein_file', type=str, help='Protein file')
parser.add_argument('-dr', '--drug_file', type=str, help='Drug file name')
# parser.add_argument('-ppi', '--ppi_file', type=str, help='Protein-Protein interaction file')
# parser.add_argument('-pdi', '--pdi_file', type=str, help='Protein-Drug interaction file')
parser.add_argument('-exp', '--exp_file', type=str, help='Tissue expression file (.gct without first 2 lines)')
parser.add_argument('-dm', '--delete_model', type=str, help='Delete model(s)')
parser.add_argument('-p', '--proteins', type=str, help='Populate Proteins')
parser.add_argument('-di', '--disorders', type=str, help='Populate Disorders')
parser.add_argument('-p', '--proteins', action='store_true', help='Populate Proteins')
parser.add_argument('-di', '--disorders', action='store_true', help='Populate Disorders')
parser.add_argument('-dr', '--drugs', action='store_true', help='Drug file name')
parser.add_argument('-exp', '--exp_file', type=str, help='Tissue expression file (.gct without first 2 lines)')
parser.add_argument('-pp', '--protein_protein', type=str, help='Populate Protein-Protein Interactions')
parser.add_argument('-pdr', '--protein_drug', type=str, help='Populate Protein-Drug Interactions')
parser.add_argument('-pdi', '--protein_disorder', type=str, help='Populate Protein-Disorder Associations')
......@@ -80,20 +80,16 @@ class Command(BaseCommand):
def handle(self, *args, **kwargs):
data_dir = kwargs['data_dir']
# protein_file = kwargs['protein_file']
drug_file = kwargs['drug_file']
# ppi_file = kwargs['ppi_file']
# pdi_file = kwargs['pdi_file']
exp_file = kwargs['exp_file']
p = kwargs['proteins']
pp = kwargs['protein_protein']
pd = kwargs['protein_drug']
# p = kwargs['proteins']
# pp = kwargs['protein_protein']
# pd = kwargs['protein_drug']
db_populator = DatabasePopulator(data_dir=data_dir,
# protein_file=protein_file,
drug_file=drug_file,
# drug_file=drug_file,
# protein_protein_interaction_file=ppi_file,
# protein_drug_interaction_file=pdi_file,
tissue_expression_file=exp_file,
......@@ -108,41 +104,33 @@ class Command(BaseCommand):
populator = DataPopulator()
if kwargs['drug_file'] is not None:
if kwargs['drugs']:
print('Populating Drugs...')
n = DataPopulator.populate_drugs(populator)
# n = nedrex_importer.import_drugs(importer,False)
# n = DataPopulator.populate_drugs(populator)
n = nedrex_importer.import_drugs(importer,False)
print(f'Populated {n} Drugs.')
# if kwargs['protein_file'] is not None:
# db_poulator.populate_protein_model()
# if kwargs['pdi_file'] is not None:
# db_poulator.populate_pdi_model()
# if kwargs['ppi_file'] is not None:
# db_poulator.populate_ppi_model()
if kwargs['exp_file'] is not None:
print('Populating Expressions...')
n = DataPopulator.populate_expessions(populator)
print(f'Populated {n} Expressions.')
if kwargs['proteins'] is not None:
if kwargs['proteins']:
print('Populating Proteins...')
# n = nedrex_importer.import_proteins(importer, False)
n = DataPopulator.populate_proteins(populator)
n = nedrex_importer.import_proteins(importer, False)
# n = DataPopulator.populate_proteins(populator)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
n = DataPopulator.populate_ensg(populator)
print(f'Populated {n} ENSG IDs.')
# print('Populating ENSG IDs...')
# n = DataPopulator.populate_ensg(populator)
# print(f'Populated {n} ENSG IDs.')
if kwargs['disorders'] is not None:
if kwargs['disorders']:
print('Populating Disorders...')
# n = nedrex_importer.import_disorders(importer, False)
n = DataPopulator.populate_disorders(populator)
n = nedrex_importer.import_disorders(importer, False)
# n = DataPopulator.populate_disorders(populator)
print(f'Populated {n} Disorders.')
if kwargs['protein_protein'] is not None:
......
import python_nedrex as nedrex
from python_nedrex.core import get_nodes, get_edges, get_api_key
def iter_node_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
base_url = "http://82.148.225.92:8123/"
nedrex.config.set_url_base(base_url)
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
iter_edge_collection("gene_expressed_in_tissue", lambda node: {print(node)})
\ No newline at end of file
# Generated by Django 3.0.4 on 2020-03-27 17:36
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Edge',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
],
),
migrations.CreateModel(
name='Effect',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
],
),
migrations.CreateModel(
name='ProteinGroup',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
('effects', models.ManyToManyField(through='drugstone.Edge', to='drugstone.Effect')),
],
),
migrations.AddField(
model_name='edge',
name='effect',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='drugstone.Effect'),
),
migrations.AddField(
model_name='edge',
name='protein_group',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='drugstone.ProteinGroup'),
),
]
# Generated by Django 3.0.4 on 2020-03-28 13:41
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='Organism',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
],
),
migrations.CreateModel(
name='Protein',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('uniprot_code', models.CharField(max_length=10)),
('protein_name', models.CharField(max_length=128)),
('gene_name', models.CharField(max_length=128)),
('organism', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='drugstone.Organism')),
('protein_group', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='drugstone.ProteinGroup')),
],
),
]
# Generated by Django 3.0.4 on 2020-03-29 15:27
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0002_organism_protein'),
]
operations = [
migrations.AlterField(
model_name='protein',
name='organism',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.Organism'),
),
migrations.AlterField(
model_name='protein',
name='protein_group',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.ProteinGroup'),
),
]
# Generated by Django 3.0.4 on 2020-03-29 15:49
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0003_auto_20200329_1527'),
]
operations = [
migrations.AddField(
model_name='protein',
name='protein_description',
field=models.CharField(default='No Description', max_length=512),
),
]
# Generated by Django 3.0.4 on 2020-03-29 20:24
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0004_protein_protein_description'),
]
operations = [
migrations.AlterField(
model_name='edge',
name='effect',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.Effect'),
),
migrations.AlterField(
model_name='edge',
name='protein_group',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.ProteinGroup'),
),
]
# Generated by Django 3.0.4 on 2020-03-29 20:41
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0005_auto_20200329_2024'),
]
operations = [
migrations.RenameField(
model_name='protein',
old_name='protein_description',
new_name='description',
),
migrations.RenameField(
model_name='protein',
old_name='gene_name',
new_name='gene',
),
migrations.RenameField(
model_name='protein',
old_name='protein_name',
new_name='name',
),
migrations.AddField(
model_name='proteingroup',
name='group_id',
field=models.IntegerField(default=-1),
),
]
# Generated by Django 3.0.4 on 2020-03-30 13:26
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0006_auto_20200329_2041'),
]
operations = [
migrations.CreateModel(
name='Virus',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
],
),
migrations.CreateModel(
name='VirusEffect',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
('effect', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.Effect')),
('virus', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.Virus')),
],
),
migrations.AddField(
model_name='virus',
name='effects',
field=models.ManyToManyField(through='drugstone.VirusEffect', to='drugstone.Effect'),
),
]
# Generated by Django 3.0.4 on 2020-03-30 17:48
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0007_auto_20200330_1326'),
]
operations = [
migrations.RemoveField(
model_name='viruseffect',
name='effect',
),
migrations.RemoveField(
model_name='viruseffect',
name='virus',
),
migrations.AddField(
model_name='effect',
name='organism',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.Organism'),
),
migrations.DeleteModel(
name='Virus',
),
migrations.DeleteModel(
name='VirusEffect',
),
]
# Generated by Django 3.0.4 on 2020-03-30 19:04
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0008_auto_20200330_1748'),
]
operations = [
migrations.AlterField(
model_name='protein',
name='gene',
field=models.CharField(default='', max_length=128),
),
]
# Generated by Django 3.0.4 on 2020-03-31 07:09
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0009_auto_20200330_1904'),
]
operations = [
migrations.AlterField(
model_name='effect',
name='organism',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.Organism'),
),
]
# Generated by Django 3.0.4 on 2020-04-01 10:31
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0010_auto_20200331_0909'),
]
operations = [
migrations.CreateModel(
name='DatasetVirus',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('dataset', models.CharField(max_length=128)),
('virus', models.CharField(max_length=128)),
],
options={
'unique_together': {('dataset', 'virus')},
},
),
migrations.RemoveField(
model_name='proteingroup',
name='effects',
),
migrations.RemoveField(
model_name='edge',
name='protein_group',
),
migrations.RemoveField(
model_name='effect',
name='organism',
),
migrations.RemoveField(
model_name='protein',
name='organism',
),
migrations.RemoveField(
model_name='protein',
name='protein_group',
),
migrations.AddField(
model_name='edge',
name='protein',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.Protein'),
),
migrations.AddField(
model_name='protein',
name='effects',
field=models.ManyToManyField(through='drugstone.Edge', to='drugstone.Effect'),
),
migrations.DeleteModel(
name='Organism',
),
migrations.DeleteModel(
name='ProteinGroup',
),
migrations.AddField(
model_name='effect',
name='dataset_virus',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='drugstone.DatasetVirus'),
),
]
# Generated by Django 3.0.4 on 2020-04-01 10:50
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0011_auto_20200401_1031'),
]
operations = [
migrations.RemoveField(
model_name='protein',
name='description',
),
migrations.RemoveField(
model_name='protein',
name='name',
),
]
# Generated by Django 3.0.4 on 2020-04-01 11:24
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0012_auto_20200401_1050'),
]
operations = [
migrations.AddField(
model_name='protein',
name='description',
field=models.CharField(default='No description', max_length=512),
),
migrations.AddField(
model_name='protein',
name='name',
field=models.CharField(default='No name', max_length=128),
),
migrations.AlterField(
model_name='protein',
name='gene',
field=models.CharField(default='No gene', max_length=128),
),
migrations.AlterField(
model_name='protein',
name='uniprot_code',
field=models.CharField(default='No code', max_length=10),
),
]
# Generated by Django 3.0.4 on 2020-04-01 13:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('drugstone', '0013_auto_20200401_1124'),
]
operations = [
migrations.AddField(
model_name='effect',
name='effect_id',
field=models.CharField(default='No id', max_length=128),
),
migrations.AlterField(
model_name='effect',
name='name',
field=models.CharField(default='No name', max_length=128),
),
migrations.AlterField(
model_name='protein',
name='uniprot_code',
field=models.CharField(default='No accession number', max_length=10),
),
]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment