From 276f374f817cc0c16d2065ccada11aaf00f503d2 Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Tue, 6 Dec 2022 19:41:48 +0100 Subject: [PATCH] updated backend to convert compact notation; fixed issue that only mapped main symbol nodes Former-commit-id: b14c40ee04e4fb7100ebb049ec65c7da701e5b8b --- drugstone/urls.py | 3 +- drugstone/util/query_db.py | 72 ++++++++++++++++++++++++++++++++++++++ drugstone/views.py | 11 ++++-- 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/drugstone/urls.py b/drugstone/urls.py index 7e0d16a..680f18b 100755 --- a/drugstone/urls.py +++ b/drugstone/urls.py @@ -19,12 +19,13 @@ from django.urls import path from drugstone.views import map_nodes, tasks_view, result_view, \ graph_export, TissueView, TissueExpressionView, query_tissue_proteins, TaskView, \ adjacent_drugs, adjacent_disorders, fetch_edges, create_network, load_network, get_license, get_datasets, \ - get_max_tissue_expression + get_max_tissue_expression, convert_compact_ids # cache time is 6 hours urlpatterns = [ path('get_datasets/', get_datasets), path('map_nodes/', map_nodes), + path('convert_compact_node_list/', convert_compact_ids), path('fetch_edges/', fetch_edges), path('task/', TaskView.as_view()), path('tasks/', tasks_view), diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index 39171f5..33584fe 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -64,6 +64,78 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L return nodes, protein_attribute +def get_protein_ids(id_space, proteins): + if (id_space == 'uniprot'): + return [p['uniprot_ac'] for p in proteins] + if (id_space == 'ensg'): + return [p['ensg'] for p in proteins] + if (id_space == 'symbol'): + return [p['symbol'] for p in proteins] + if (id_space == 'entrez'): + return [p['entrez'] for p in proteins] + return set() + + +def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> List[str]: + """Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name + (identifier). + The identifier name represents any protein attribute, e.g. uniprot or symbol. + The identifier names vary from the Protein table names since they are the strings which are set by the user + in the frontend, for readability they were changes from the original backend attributes. + + Args: + node_ids (list): List of protein or gene identifiers. Note: Do not mix identifiers. + identifier (str): Can be one of "symbol", "ensg", "uniprot" + + Returns: + Tuple[List[dict], str]: + Returns list of serialized protein entries for all matched IDs + Returns name of backend attribute of Protein table + """ + # query protein table + if len(node_ids) == 0: + return list() + id_map = { + 'symbol:': set(), + 'uniprot:': set(), + 'ensg:': set(), + 'entrez:': set() + } + clean_ids = set() + for node_id in node_ids: + added = False + for id_space in id_map.keys(): + if node_id.startswith(id_space): + id_map[id_space].add(node_id[len(id_space):].upper()) + added = True + break + if not added: + clean_ids.add(node_id) + + for id_space, ids in id_map.items(): + if len(ids) == 0: + continue + if id_space == 'symbol:': + q_list = map(lambda n: Q(gene__iexact=n), ids) + elif id_space == 'uniprot:': + q_list = map(lambda n: Q(uniprot_code__iexact=n), ids) + elif id_space == 'ensg:': + ensembls = EnsemblGene.objects.filter(reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), ids))) + if len(ensembls) == 0: + continue + dr_ids = map(lambda n: n.protein_id, ensembls) + q_list = map(lambda n: Q(id=n), dr_ids) + elif id_space == 'entrez:': + q_list = map(lambda n: Q(entrez=n), ids) + else: + continue + q_list = reduce(lambda a, b: a | b, q_list) + proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list)) + clean_ids = clean_ids.union(get_protein_ids(identifier, proteins)) + + return list(clean_ids) + + def aggregate_nodes(nodes: List[OrderedDict]): node = defaultdict(set) for n in nodes: diff --git a/drugstone/views.py b/drugstone/views.py index d116d11..8195873 100755 --- a/drugstone/views.py +++ b/drugstone/views.py @@ -15,7 +15,7 @@ from django.db import IntegrityError from rest_framework.decorators import api_view from rest_framework.response import Response from rest_framework.views import APIView -from drugstone.util.query_db import query_proteins_by_identifier +from drugstone.util.query_db import query_proteins_by_identifier, clean_proteins_from_compact_notation from drugstone.models import * from drugstone.serializers import * @@ -143,6 +143,12 @@ def fetch_edges(request) -> Response: return Response(ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects)) +@api_view(['POST']) +def convert_compact_ids(request) -> Response: + nodes = request.data.get('nodes', '[]') + identifier = request.data.get('identifier', '') + cleaned = clean_proteins_from_compact_notation(nodes, identifier) + return Response(cleaned) @api_view(['POST']) def map_nodes(request) -> Response: @@ -175,7 +181,8 @@ def map_nodes(request) -> Response: nodes_mapped, id_key = query_proteins_by_identifier(node_ids, identifier) # change data structure to dict in order to be quicker when merging - nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped} + nodes_mapped_dict = {id.upper(): node for node in nodes_mapped for id in node[id_key]} + print(nodes_mapped_dict) # merge fetched data with given data to avoid data loss for node in nodes: -- GitLab