diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index a589cf2b551d85cfbdbbfa8e4ff7a604202785f1..8ccd9e88e5b2f281c0dab448d65845cafafe28dc 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -7,6 +7,16 @@ from drugstone.models import Protein, EnsemblGene from drugstone.serializers import ProteinSerializer +MAP_ID_SPACE_COMPACT_TO_DRUGSTONE = { + 'symbol:': 'symbol', + 'uniprot:': 'uniprot', + 'ensg:': 'ensg', + 'ncbigene:': 'entrez', + 'ensembl:': 'ensg', + 'entrez:': 'entrez' +} + + def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[List[dict], str]: """Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name (identifier). @@ -66,13 +76,13 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L def get_protein_ids(id_space, proteins): if (id_space == 'uniprot'): - return [p['uniprot'] for p in proteins] + return {p['uniprot'] for p in proteins} if (id_space == 'ensg' or id_space == 'ensembl'): - return [p['ensg'] for p in proteins] + return {p['ensg'] for p in proteins} if (id_space == 'symbol'): - return [p['symbol'] for p in proteins] + return {p['symbol'] for p in proteins} if (id_space == 'entrez' or id_space == 'ncbigene'): - return [p['entrez'] for p in proteins] + return {p['entrez'] for p in proteins} return set() @@ -136,7 +146,14 @@ def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> continue q_list = reduce(lambda a, b: a | b, q_list) proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list)) - clean_ids = clean_ids.union(get_protein_ids(identifier, proteins)) + # if protein could not be mapped + clean_ids_temp = get_protein_ids(identifier, proteins) + if '' in clean_ids_temp: + clean_ids_temp.remove('') + # at least one protein could not be found in id space, use original id as placeholder + ids_placeholder = {p[MAP_ID_SPACE_COMPACT_TO_DRUGSTONE[id_space]] for p in proteins if p[identifier] == ''} + clean_ids_temp |= ids_placeholder + clean_ids |= clean_ids_temp return list(clean_ids) diff --git a/drugstone/views.py b/drugstone/views.py index 89081abd4ba44e55fc269652a7b7c07ea77a7cce..366af9f3335fa34e1ee9f56b84fbd02b7159c323 100755 --- a/drugstone/views.py +++ b/drugstone/views.py @@ -175,11 +175,18 @@ def map_nodes(request) -> Response: """ # load data from request nodes = request.data.get('nodes', '[]') + id_map = {} + nodes_clean = [] for node in nodes: + if not node['id']: + # skip empty node id '' + continue upper = node['id'].upper() id_map[upper] = node['id'] node['id'] = upper + nodes_clean.append(node) + nodes = nodes_clean identifier = request.data.get('identifier', '') # extract ids for filtering