From f29d0c1228236bbaf951c3daaafa86ab48c70d5a Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Thu, 7 Jul 2022 19:20:05 +0200
Subject: [PATCH] fixed entrez and ensembl based gene mapping

Former-commit-id: 8cac835edcd821aff0621b29a152cd2f4c77f426
---
 drugstone/models.py          |  1 -
 drugstone/util/query_db.py   | 15 +++++++++------
 scripts/docker-entrypoint.sh |  4 ++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drugstone/models.py b/drugstone/models.py
index 4c63095..3cc0e46 100755
--- a/drugstone/models.py
+++ b/drugstone/models.py
@@ -68,7 +68,6 @@ class Protein(models.Model):
     entrez = models.CharField(max_length=15, default='')
     drugs = models.ManyToManyField('Drug', through='ProteinDrugInteraction',
                                    related_name='interacting_drugs')
-    ensembl = models.CharField(max_length=15, default='')
     tissue_expression = models.ManyToManyField('Tissue', through='ExpressionLevel',
                                                related_name='interacting_drugs')
 
diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py
index a03fe1e..62e2e83 100644
--- a/drugstone/util/query_db.py
+++ b/drugstone/util/query_db.py
@@ -1,11 +1,11 @@
-from typing import List, Tuple
+from typing import List, Tuple, Set
 from functools import reduce
 from django.db.models import Q
-from drugstone.models import Protein
+from drugstone.models import Protein, EnsemblGene
 from drugstone.serializers import ProteinSerializer
 
 
-def query_proteins_by_identifier(node_ids: List[str], identifier: str) -> Tuple[List[dict], str]:
+def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[List[dict], str]:
     """Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name
     (identifier).
     The identifier name represents any protein attribute, e.g. uniprot or symbol.
@@ -31,13 +31,16 @@ def query_proteins_by_identifier(node_ids: List[str], identifier: str) -> Tuple[
         q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
     elif identifier == 'ensg':
         protein_attribute = 'ensg'
-        q_list = map(lambda n: Q(ensg__name__iexact=n), node_ids)
-
+        node_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(reduce(lambda a,b: a|b, map(lambda n:Q(name__iexact=n),list(node_ids)))))
+        q_list = map(lambda n: Q(id=n), node_ids)
+    elif identifier == 'entrez':
+        protein_attribute = 'entrez'
+        q_list = map(lambda n: Q(entrez=n), node_ids)
     if not node_ids:
         # node_ids is an empty list
         return [], protein_attribute
-
     q_list = reduce(lambda a, b: a | b, q_list)
+
     node_objects = Protein.objects.filter(q_list)
     # serialize
     nodes = ProteinSerializer(many=True).to_representation(node_objects)
diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh
index ac1a58f..db7a2d3 100755
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone
 python3 manage.py migrate
 python3 manage.py createfixtures
 python3 manage.py cleanuptasks
-python3 manage.py populate_db --all
-python3 manage.py make_graphs
+#python3 manage.py populate_db --update -a
+#python3 manage.py make_graphs
 
 /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
-- 
GitLab