Skip to content
Snippets Groups Projects
Commit 2692374c authored by Hartung, Michael's avatar Hartung, Michael
Browse files

map_nodes: remove empty node ids; compact_ids: fallack to input id if protein...

map_nodes: remove empty node ids; compact_ids: fallack to input id if protein does not have id in target space


Former-commit-id: 712b553579c8857e63d895df715677919b31cec2 [formerly 320590f9cd52ce892d309763aca62ee26b8305b1]
Former-commit-id: b1ab54bc90f5c43527fcf914183c0cbd89350c36
parent 8c3c38ea
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,16 @@ from drugstone.models import Protein, EnsemblGene
from drugstone.serializers import ProteinSerializer
MAP_ID_SPACE_COMPACT_TO_DRUGSTONE = {
'symbol:': 'symbol',
'uniprot:': 'uniprot',
'ensg:': 'ensg',
'ncbigene:': 'entrez',
'ensembl:': 'ensg',
'entrez:': 'entrez'
}
def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[List[dict], str]:
"""Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name
(identifier).
......@@ -66,13 +76,13 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
def get_protein_ids(id_space, proteins):
if (id_space == 'uniprot'):
return [p['uniprot'] for p in proteins]
return {p['uniprot'] for p in proteins}
if (id_space == 'ensg' or id_space == 'ensembl'):
return [p['ensg'] for p in proteins]
return {p['ensg'] for p in proteins}
if (id_space == 'symbol'):
return [p['symbol'] for p in proteins]
return {p['symbol'] for p in proteins}
if (id_space == 'entrez' or id_space == 'ncbigene'):
return [p['entrez'] for p in proteins]
return {p['entrez'] for p in proteins}
return set()
......@@ -136,7 +146,14 @@ def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) ->
continue
q_list = reduce(lambda a, b: a | b, q_list)
proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list))
clean_ids = clean_ids.union(get_protein_ids(identifier, proteins))
# if protein could not be mapped
clean_ids_temp = get_protein_ids(identifier, proteins)
if '' in clean_ids_temp:
clean_ids_temp.remove('')
# at least one protein could not be found in id space, use original id as placeholder
ids_placeholder = {p[MAP_ID_SPACE_COMPACT_TO_DRUGSTONE[id_space]] for p in proteins if p[identifier] == ''}
clean_ids_temp |= ids_placeholder
clean_ids |= clean_ids_temp
return list(clean_ids)
......
......@@ -175,11 +175,18 @@ def map_nodes(request) -> Response:
"""
# load data from request
nodes = request.data.get('nodes', '[]')
id_map = {}
nodes_clean = []
for node in nodes:
if not node['id']:
# skip empty node id ''
continue
upper = node['id'].upper()
id_map[upper] = node['id']
node['id'] = upper
nodes_clean.append(node)
nodes = nodes_clean
identifier = request.data.get('identifier', '')
# extract ids for filtering
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment