From dc5998e56ede26437654ff2b8b964ce6f391b0e5 Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Fri, 29 Jul 2022 11:55:07 +0200 Subject: [PATCH] fixed ensg mode; fixed auto edges on results; fixed issue with dataset pairs on network generation --- drugstone/management/commands/make_graphs.py | 16 ++++++----- drugstone/util/query_db.py | 2 ++ drugstone/views.py | 30 ++++---------------- 3 files changed, 16 insertions(+), 32 deletions(-) diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py index 8794f48..5a1f6ec 100755 --- a/drugstone/management/commands/make_graphs.py +++ b/drugstone/management/commands/make_graphs.py @@ -242,18 +242,20 @@ class Command(BaseCommand): parameter_combinations = [] for protein_interaction_dataset in ppi_datasets: for pdi_dataset in pdi_datasets: - licenced = protein_interaction_dataset.licenced or pdi_dataset.licenced + ppi_ds = protein_interaction_dataset + pdi_ds = pdi_dataset + licenced = ppi_ds.licenced or pdi_ds.licenced if licenced: - protein_interaction_dataset = licenced_ppi_dataset[ - protein_interaction_dataset.name] if protein_interaction_dataset.name in licenced_ppi_dataset else protein_interaction_dataset - pdi_dataset = licenced_pdi_dataset[ - pdi_dataset.name] if pdi_dataset.name in licenced_pdi_dataset else pdi_dataset - hash = f'{protein_interaction_dataset.name}-{pdi_dataset.name}_{licenced}' + ppi_ds = licenced_ppi_dataset[ + ppi_ds.name] if protein_interaction_dataset.name in licenced_ppi_dataset else ppi_ds + pdi_ds = licenced_pdi_dataset[ + pdi_ds.name] if pdi_ds.name in licenced_pdi_dataset else pdi_ds + hash = f'{ppi_ds.name}-{pdi_ds.name}_{licenced}' if hash in uniq_combis: continue uniq_combis.add(hash) for identifier in ['ensg', 'symbol', 'ensembl', 'uniprot']: - parameter_combinations.append([protein_interaction_dataset, pdi_dataset, identifier]) + parameter_combinations.append([ppi_ds, pdi_ds, identifier]) # close all database connections so subprocesses will create their own connections # this prevents the processes from running into problems because of using the same connection db.connections.close_all() diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index ba26adb..0afd48d 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -24,6 +24,8 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L Returns name of backend attribute of Protein table """ # query protein table + if(len(node_ids) == 0): + return list(), identifier if identifier == 'symbol': protein_attribute = 'symbol' q_list = map(lambda n: Q(gene__iexact=n), node_ids) diff --git a/drugstone/views.py b/drugstone/views.py index b5e8183..a60a38c 100755 --- a/drugstone/views.py +++ b/drugstone/views.py @@ -377,27 +377,13 @@ def result_view(request) -> Response: nodes_mapped, id_key = query_proteins_by_identifier(edge_endpoint_ids, identifier) - # change data structure to dict in order to be quicker when merging - print(nodes_mapped) - # nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped} - # nodes_mapped_dict_reverse = {} - # for id, node in nodes_mapped_dict.items(): - # for drugstone_id in node[node_name_attribute]: - # nodes_mapped_dict_reverse[node[drugstone_id]] = id - - print(nodes) - # for edge in edges: - # # change edge endpoints if they were matched with a protein in the database - # edge['from'] = nodes_mapped_dict[edge['from']][node_name_attribute] if edge['from'] in nodes_mapped_dict else \ - # edge['from'] - # edge['to'] = nodes_mapped_dict[edge['to']][node_name_attribute] if edge['to'] in nodes_mapped_dict else edge[ - # 'to'] + if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']: + prots = list(filter(lambda n: n['drugstone_type'] == 'protein', + filter(lambda n: 'drugstone_type' in n and node_name_attribute in n, parameters['input_network']['nodes']))) - if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']: - proteins = {node_name[1:] for nodes in map(lambda n: 'drugstoneType' in n and n[node_name_attribute], - filter(lambda n: n.drugstoneType == 'protein' ,filter(lambda n: 'drugstoneType' in n and node_name_attribute in n ,parameters['input_network']['nodes']))) for node_name in nodes} + proteins = {node_name[1:] for node in prots for node_name in node[node_name_attribute]} dataset = DEFAULTS['ppi'] if 'interaction_protein_protein' not in parameters['config'] else \ parameters['config'][ 'interaction_protein_protein'] @@ -407,18 +393,15 @@ def result_view(request) -> Response: auto_edges = list(map(lambda n: {"from": f'p{n.from_protein_id}', "to": f'p{n.to_protein_id}'}, interaction_objects)) edges.extend(auto_edges) - # TODO check what to do with edges with from and to id lists - result['network']['edges'].extend(edges) uniq_edges = dict() for edge in result['network']['edges']: - print(edge) hash = edge['from'] + edge['to'] uniq_edges[hash] = edge result['network']['edges'] = list(uniq_edges.values()) - # result['network']['nodes'] = list(identifier_nodes) + if 'scores' in result['node_attributes']: del result['node_attributes']['scores'] @@ -671,9 +654,6 @@ class TissueExpressionView(APIView): for node in nodes + seeds: node_type = node_types.get(node) details = None - # if not node_type: - # print('we should not see this 3') - # node_type, details = infer_node_type_and_details(node) if node_type == 'protein': if details: proteins.append(details) -- GitLab