fixed ensg mode; fixed auto edges on results; fixed issue with dataset pairs on network generation

Former-commit-id: dc5998e5

fixed ensg mode; fixed auto edges on results; fixed issue with dataset pairs on network generation
0dcd4b69 · AndiMajore · 2d03c068 · 0dcd4b69 · 0dcd4b69 · 0dcd4b69
Commit 0dcd4b69 authored 2 years ago by AndiMajore
--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -242,18 +242,20 @@ class Command(BaseCommand):
        parameter_combinations = []
        for protein_interaction_dataset in ppi_datasets:
            for pdi_dataset in pdi_datasets:
-                licenced = protein_interaction_dataset.licenced or pdi_dataset.licenced
+                ppi_ds = protein_interaction_dataset
+                pdi_ds = pdi_dataset
+                licenced = ppi_ds.licenced or pdi_ds.licenced
                if licenced:
-                    protein_interaction_dataset = licenced_ppi_dataset[
-                        protein_interaction_dataset.name] if protein_interaction_dataset.name in licenced_ppi_dataset else protein_interaction_dataset
-                    pdi_dataset = licenced_pdi_dataset[
-                        pdi_dataset.name] if pdi_dataset.name in licenced_pdi_dataset else pdi_dataset
-                hash = f'{protein_interaction_dataset.name}-{pdi_dataset.name}_{licenced}'
+                    ppi_ds = licenced_ppi_dataset[
+                        ppi_ds.name] if protein_interaction_dataset.name in licenced_ppi_dataset else ppi_ds
+                    pdi_ds = licenced_pdi_dataset[
+                        pdi_ds.name] if pdi_ds.name in licenced_pdi_dataset else pdi_ds
+                hash = f'{ppi_ds.name}-{pdi_ds.name}_{licenced}'
                if hash in uniq_combis:
                    continue
                uniq_combis.add(hash)
                for identifier in ['ensg', 'symbol', 'ensembl', 'uniprot']:
-                    parameter_combinations.append([protein_interaction_dataset, pdi_dataset, identifier])
+                    parameter_combinations.append([ppi_ds, pdi_ds, identifier])
        # close all database connections so subprocesses will create their own connections
        # this prevents the processes from running into problems because of using the same connection
        db.connections.close_all()

--- a/drugstone/util/query_db.py
+++ b/drugstone/util/query_db.py
@@ -24,6 +24,8 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
            Returns name of backend attribute of Protein table
    """
    # query protein table
+    if(len(node_ids) == 0):
+        return list(), identifier
    if identifier == 'symbol':
        protein_attribute = 'symbol'
        q_list = map(lambda n: Q(gene__iexact=n), node_ids)

--- a/drugstone/views.py
+++ b/drugstone/views.py
@@ -377,27 +377,13 @@ def result_view(request) -> Response:

    nodes_mapped, id_key = query_proteins_by_identifier(edge_endpoint_ids, identifier)

-    # change data structure to dict in order to be quicker when merging
-    print(nodes_mapped)
-    # nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped}
-    # nodes_mapped_dict_reverse = {}
-    # for id, node in nodes_mapped_dict.items():
-    #     for drugstone_id in node[node_name_attribute]:
-    #         nodes_mapped_dict_reverse[node[drugstone_id]] = id
-
-    print(nodes)
-    # for edge in edges:
-    #     # change edge endpoints if they were matched with a protein in the database
-    #     edge['from'] = nodes_mapped_dict[edge['from']][node_name_attribute] if edge['from'] in nodes_mapped_dict else \
-    #         edge['from']
-    #     edge['to'] = nodes_mapped_dict[edge['to']][node_name_attribute] if edge['to'] in nodes_mapped_dict else edge[
-    #         'to']

+    if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']:

+        prots = list(filter(lambda n: n['drugstone_type'] == 'protein',
+                filter(lambda n: 'drugstone_type' in n and node_name_attribute in n, parameters['input_network']['nodes'])))

-    if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']:
-        proteins = {node_name[1:] for nodes in map(lambda n: 'drugstoneType' in n and n[node_name_attribute],
-                                                   filter(lambda n: n.drugstoneType == 'protein' ,filter(lambda n: 'drugstoneType' in n and node_name_attribute in n ,parameters['input_network']['nodes']))) for node_name in nodes}
+        proteins = {node_name[1:] for node in prots for node_name in node[node_name_attribute]}
        dataset = DEFAULTS['ppi'] if 'interaction_protein_protein' not in parameters['config'] else \
            parameters['config'][
                'interaction_protein_protein']
@@ -407,18 +393,15 @@ def result_view(request) -> Response:
        auto_edges = list(map(lambda n: {"from": f'p{n.from_protein_id}', "to": f'p{n.to_protein_id}'},
                              interaction_objects))
        edges.extend(auto_edges)
-        # TODO check what to do with edges with from and to id lists
-


    result['network']['edges'].extend(edges)
    uniq_edges = dict()
    for edge in result['network']['edges']:
-        print(edge)
        hash = edge['from'] + edge['to']
        uniq_edges[hash] = edge
    result['network']['edges'] = list(uniq_edges.values())
-    # result['network']['nodes'] = list(identifier_nodes)
+
    if 'scores' in result['node_attributes']:
        del result['node_attributes']['scores']

@@ -671,9 +654,6 @@ class TissueExpressionView(APIView):
            for node in nodes + seeds:
                node_type = node_types.get(node)
                details = None
-                # if not node_type:
-                #     print('we should not see this 3')
-                #     node_type, details = infer_node_type_and_details(node)
                if node_type == 'protein':
                    if details:
                        proteins.append(details)