Skip to content
Snippets Groups Projects
Commit 276f374f authored by AndiMajore's avatar AndiMajore
Browse files

updated backend to convert compact notation; fixed issue that only mapped main symbol nodes

Former-commit-id: b14c40ee
parent 725b0764
No related branches found
No related tags found
No related merge requests found
......@@ -19,12 +19,13 @@ from django.urls import path
from drugstone.views import map_nodes, tasks_view, result_view, \
graph_export, TissueView, TissueExpressionView, query_tissue_proteins, TaskView, \
adjacent_drugs, adjacent_disorders, fetch_edges, create_network, load_network, get_license, get_datasets, \
get_max_tissue_expression
get_max_tissue_expression, convert_compact_ids
# cache time is 6 hours
urlpatterns = [
path('get_datasets/', get_datasets),
path('map_nodes/', map_nodes),
path('convert_compact_node_list/', convert_compact_ids),
path('fetch_edges/', fetch_edges),
path('task/', TaskView.as_view()),
path('tasks/', tasks_view),
......
......@@ -64,6 +64,78 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
return nodes, protein_attribute
def get_protein_ids(id_space, proteins):
if (id_space == 'uniprot'):
return [p['uniprot_ac'] for p in proteins]
if (id_space == 'ensg'):
return [p['ensg'] for p in proteins]
if (id_space == 'symbol'):
return [p['symbol'] for p in proteins]
if (id_space == 'entrez'):
return [p['entrez'] for p in proteins]
return set()
def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> List[str]:
"""Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name
(identifier).
The identifier name represents any protein attribute, e.g. uniprot or symbol.
The identifier names vary from the Protein table names since they are the strings which are set by the user
in the frontend, for readability they were changes from the original backend attributes.
Args:
node_ids (list): List of protein or gene identifiers. Note: Do not mix identifiers.
identifier (str): Can be one of "symbol", "ensg", "uniprot"
Returns:
Tuple[List[dict], str]:
Returns list of serialized protein entries for all matched IDs
Returns name of backend attribute of Protein table
"""
# query protein table
if len(node_ids) == 0:
return list()
id_map = {
'symbol:': set(),
'uniprot:': set(),
'ensg:': set(),
'entrez:': set()
}
clean_ids = set()
for node_id in node_ids:
added = False
for id_space in id_map.keys():
if node_id.startswith(id_space):
id_map[id_space].add(node_id[len(id_space):].upper())
added = True
break
if not added:
clean_ids.add(node_id)
for id_space, ids in id_map.items():
if len(ids) == 0:
continue
if id_space == 'symbol:':
q_list = map(lambda n: Q(gene__iexact=n), ids)
elif id_space == 'uniprot:':
q_list = map(lambda n: Q(uniprot_code__iexact=n), ids)
elif id_space == 'ensg:':
ensembls = EnsemblGene.objects.filter(reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), ids)))
if len(ensembls) == 0:
continue
dr_ids = map(lambda n: n.protein_id, ensembls)
q_list = map(lambda n: Q(id=n), dr_ids)
elif id_space == 'entrez:':
q_list = map(lambda n: Q(entrez=n), ids)
else:
continue
q_list = reduce(lambda a, b: a | b, q_list)
proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list))
clean_ids = clean_ids.union(get_protein_ids(identifier, proteins))
return list(clean_ids)
def aggregate_nodes(nodes: List[OrderedDict]):
node = defaultdict(set)
for n in nodes:
......
......@@ -15,7 +15,7 @@ from django.db import IntegrityError
from rest_framework.decorators import api_view
from rest_framework.response import Response
from rest_framework.views import APIView
from drugstone.util.query_db import query_proteins_by_identifier
from drugstone.util.query_db import query_proteins_by_identifier, clean_proteins_from_compact_notation
from drugstone.models import *
from drugstone.serializers import *
......@@ -143,6 +143,12 @@ def fetch_edges(request) -> Response:
return Response(ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects))
@api_view(['POST'])
def convert_compact_ids(request) -> Response:
nodes = request.data.get('nodes', '[]')
identifier = request.data.get('identifier', '')
cleaned = clean_proteins_from_compact_notation(nodes, identifier)
return Response(cleaned)
@api_view(['POST'])
def map_nodes(request) -> Response:
......@@ -175,7 +181,8 @@ def map_nodes(request) -> Response:
nodes_mapped, id_key = query_proteins_by_identifier(node_ids, identifier)
# change data structure to dict in order to be quicker when merging
nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped}
nodes_mapped_dict = {id.upper(): node for node in nodes_mapped for id in node[id_key]}
print(nodes_mapped_dict)
# merge fetched data with given data to avoid data loss
for node in nodes:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment