updated backend to convert compact notation; fixed issue that only mapped main symbol nodes

Former-commit-id: b14c40ee

updated backend to convert compact notation; fixed issue that only mapped main symbol nodes
276f374f · AndiMajore · 725b0764 · 276f374f · 276f374f · 276f374f
Commit 276f374f authored 2 years ago by AndiMajore
--- a/drugstone/urls.py
+++ b/drugstone/urls.py
@@ -19,12 +19,13 @@ from django.urls import path
 from drugstone.views import map_nodes, tasks_view, result_view, \
    graph_export, TissueView, TissueExpressionView, query_tissue_proteins, TaskView, \
    adjacent_drugs, adjacent_disorders, fetch_edges, create_network, load_network, get_license, get_datasets, \
-    get_max_tissue_expression
+    get_max_tissue_expression, convert_compact_ids

 # cache time is 6 hours
 urlpatterns = [
    path('get_datasets/', get_datasets),
    path('map_nodes/', map_nodes),
+    path('convert_compact_node_list/', convert_compact_ids),
    path('fetch_edges/', fetch_edges),
    path('task/', TaskView.as_view()),
    path('tasks/', tasks_view),

--- a/drugstone/util/query_db.py
+++ b/drugstone/util/query_db.py
@@ -64,6 +64,78 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
    return nodes, protein_attribute


+def get_protein_ids(id_space, proteins):
+    if (id_space == 'uniprot'):
+        return [p['uniprot_ac'] for p in proteins]
+    if (id_space == 'ensg'):
+        return [p['ensg'] for p in proteins]
+    if (id_space == 'symbol'):
+        return [p['symbol'] for p in proteins]
+    if (id_space == 'entrez'):
+        return [p['entrez'] for p in proteins]
+    return set()
+
+
+def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> List[str]:
+    """Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name
+    (identifier).
+    The identifier name represents any protein attribute, e.g. uniprot or symbol.
+    The identifier names vary from the Protein table names since they are the strings which are set by the user
+    in the frontend, for readability they were changes from the original backend attributes.
+
+    Args:
+        node_ids (list): List of protein or gene identifiers. Note: Do not mix identifiers.
+        identifier (str): Can be one of "symbol", "ensg", "uniprot"
+
+    Returns:
+        Tuple[List[dict], str]:
+            Returns list of serialized protein entries for all matched IDs
+            Returns name of backend attribute of Protein table
+    """
+    # query protein table
+    if len(node_ids) == 0:
+        return list()
+    id_map = {
+        'symbol:': set(),
+        'uniprot:': set(),
+        'ensg:': set(),
+        'entrez:': set()
+    }
+    clean_ids = set()
+    for node_id in node_ids:
+        added = False
+        for id_space in id_map.keys():
+            if node_id.startswith(id_space):
+                id_map[id_space].add(node_id[len(id_space):].upper())
+                added = True
+                break
+        if not added:
+            clean_ids.add(node_id)
+
+    for id_space, ids in id_map.items():
+        if len(ids) == 0:
+            continue
+        if id_space == 'symbol:':
+            q_list = map(lambda n: Q(gene__iexact=n), ids)
+        elif id_space == 'uniprot:':
+            q_list = map(lambda n: Q(uniprot_code__iexact=n), ids)
+        elif id_space == 'ensg:':
+            ensembls = EnsemblGene.objects.filter(reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), ids)))
+            if len(ensembls) == 0:
+                continue
+            dr_ids = map(lambda n: n.protein_id, ensembls)
+            q_list = map(lambda n: Q(id=n), dr_ids)
+        elif id_space == 'entrez:':
+            q_list = map(lambda n: Q(entrez=n), ids)
+        else:
+            continue
+        q_list = reduce(lambda a, b: a | b, q_list)
+        proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list))
+        clean_ids = clean_ids.union(get_protein_ids(identifier, proteins))
+
+    return list(clean_ids)
+
+
 def aggregate_nodes(nodes: List[OrderedDict]):
    node = defaultdict(set)
    for n in nodes:

--- a/drugstone/views.py
+++ b/drugstone/views.py
@@ -15,7 +15,7 @@ from django.db import IntegrityError
 from rest_framework.decorators import api_view
 from rest_framework.response import Response
 from rest_framework.views import APIView
-from drugstone.util.query_db import query_proteins_by_identifier
+from drugstone.util.query_db import query_proteins_by_identifier, clean_proteins_from_compact_notation

 from drugstone.models import *
 from drugstone.serializers import *
@@ -143,6 +143,12 @@ def fetch_edges(request) -> Response:

    return Response(ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects))

+@api_view(['POST'])
+def convert_compact_ids(request) -> Response:
+    nodes = request.data.get('nodes', '[]')
+    identifier = request.data.get('identifier', '')
+    cleaned = clean_proteins_from_compact_notation(nodes, identifier)
+    return Response(cleaned)

 @api_view(['POST'])
 def map_nodes(request) -> Response:
@@ -175,7 +181,8 @@ def map_nodes(request) -> Response:
    nodes_mapped, id_key = query_proteins_by_identifier(node_ids, identifier)

    # change data structure to dict in order to be quicker when merging
-    nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped}
+    nodes_mapped_dict = {id.upper(): node for node in nodes_mapped for id in node[id_key]}
+    print(nodes_mapped_dict)

    # merge fetched data with given data to avoid data loss
    for node in nodes: