From f29d0c1228236bbaf951c3daaafa86ab48c70d5a Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Thu, 7 Jul 2022 19:20:05 +0200 Subject: [PATCH] fixed entrez and ensembl based gene mapping Former-commit-id: 8cac835edcd821aff0621b29a152cd2f4c77f426 --- drugstone/models.py | 1 - drugstone/util/query_db.py | 15 +++++++++------ scripts/docker-entrypoint.sh | 4 ++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drugstone/models.py b/drugstone/models.py index 4c63095..3cc0e46 100755 --- a/drugstone/models.py +++ b/drugstone/models.py @@ -68,7 +68,6 @@ class Protein(models.Model): entrez = models.CharField(max_length=15, default='') drugs = models.ManyToManyField('Drug', through='ProteinDrugInteraction', related_name='interacting_drugs') - ensembl = models.CharField(max_length=15, default='') tissue_expression = models.ManyToManyField('Tissue', through='ExpressionLevel', related_name='interacting_drugs') diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index a03fe1e..62e2e83 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -1,11 +1,11 @@ -from typing import List, Tuple +from typing import List, Tuple, Set from functools import reduce from django.db.models import Q -from drugstone.models import Protein +from drugstone.models import Protein, EnsemblGene from drugstone.serializers import ProteinSerializer -def query_proteins_by_identifier(node_ids: List[str], identifier: str) -> Tuple[List[dict], str]: +def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[List[dict], str]: """Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name (identifier). The identifier name represents any protein attribute, e.g. uniprot or symbol. @@ -31,13 +31,16 @@ def query_proteins_by_identifier(node_ids: List[str], identifier: str) -> Tuple[ q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids) elif identifier == 'ensg': protein_attribute = 'ensg' - q_list = map(lambda n: Q(ensg__name__iexact=n), node_ids) - + node_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(reduce(lambda a,b: a|b, map(lambda n:Q(name__iexact=n),list(node_ids))))) + q_list = map(lambda n: Q(id=n), node_ids) + elif identifier == 'entrez': + protein_attribute = 'entrez' + q_list = map(lambda n: Q(entrez=n), node_ids) if not node_ids: # node_ids is an empty list return [], protein_attribute - q_list = reduce(lambda a, b: a | b, q_list) + node_objects = Protein.objects.filter(q_list) # serialize nodes = ProteinSerializer(many=True).to_representation(node_objects) diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index ac1a58f..db7a2d3 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone python3 manage.py migrate python3 manage.py createfixtures python3 manage.py cleanuptasks -python3 manage.py populate_db --all -python3 manage.py make_graphs +#python3 manage.py populate_db --update -a +#python3 manage.py make_graphs /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" -- GitLab