Skip to content
Snippets Groups Projects
Commit 02516d89 authored by AndiMajore's avatar AndiMajore
Browse files

changed to new node schema

Former-commit-id: 282910642bb91c4db37a0f20283c9af18c874eae [formerly b57d24f8df32f31d06ddee3e87e0fec8c18812d6]
Former-commit-id: b38ca8e6c2532dc4d96a68c6c6af434360428c5b
parent ff8f093b
No related branches found
No related tags found
No related merge requests found
...@@ -17,6 +17,41 @@ class PPIDatasetSerializer(serializers.ModelSerializer): ...@@ -17,6 +17,41 @@ class PPIDatasetSerializer(serializers.ModelSerializer):
model = models.PPIDataset model = models.PPIDataset
fields = '__all__' fields = '__all__'
class ProteinNodeSerializer(serializers.ModelSerializer):
drugstone_id = serializers.SerializerMethodField()
uniprot_ac = serializers.SerializerMethodField()
symbol = serializers.SerializerMethodField()
ensg = serializers.SerializerMethodField()
entrez = serializers.SerializerMethodField()
def get_drugstone_id(self, obj):
return [f'p{obj.id}']
def get_uniprot_ac(self, obj):
return [obj.uniprot_code]
def get_symbol(self, obj):
return [obj.gene]
def get_entrez(self,obj):
return [obj.entrez]
def get_ensg(self, obj) -> str:
"""Since ENSG has a many to one relationship to the Protein table,
return a list of all matching ensg names.
Args:
obj (Protein): Protein object
Returns:
str: list of all matching ENSG numbers
"""
return [x.name for x in obj.ensg.all()]
class Meta:
model = Protein
fields = ['drugstone_id', 'uniprot_ac', 'symbol', 'protein_name', 'entrez', 'ensg']
class ProteinSerializer(serializers.ModelSerializer): class ProteinSerializer(serializers.ModelSerializer):
drugstone_id = serializers.SerializerMethodField() drugstone_id = serializers.SerializerMethodField()
......
...@@ -19,7 +19,7 @@ def task_update_db_from_nedrex(): ...@@ -19,7 +19,7 @@ def task_update_db_from_nedrex():
if n > 0: if n > 0:
logger.info('Recreating networks...') logger.info('Recreating networks...')
proc = subprocess.Popen(['python3', '/usr/src/drugstone/manage.py', 'make_graphs']) proc = subprocess.Popen(['python3', '/usr/src/drugstone/manage.py', 'make_graphs'])
out,err = proc.communicate() out, err = proc.communicate()
print(out) print(out)
print(err) print(err)
logger.info('Done.') logger.info('Done.')
import copy
from collections import defaultdict from collections import defaultdict
from typing import List, Tuple, Set, OrderedDict from typing import List, Tuple, Set, OrderedDict
from functools import reduce from functools import reduce
...@@ -22,7 +23,6 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L ...@@ -22,7 +23,6 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
Returns list of serialized protein entries for all matched IDs Returns list of serialized protein entries for all matched IDs
Returns name of backend attribute of Protein table Returns name of backend attribute of Protein table
""" """
# query protein table # query protein table
if identifier == 'symbol': if identifier == 'symbol':
protein_attribute = 'symbol' protein_attribute = 'symbol'
...@@ -32,9 +32,9 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L ...@@ -32,9 +32,9 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids) q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
elif identifier == 'ensg': elif identifier == 'ensg':
protein_attribute = 'ensg' protein_attribute = 'ensg'
node_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter( dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids))))) reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids)))))
q_list = map(lambda n: Q(id=n), node_ids) q_list = map(lambda n: Q(id=n), dr_ids)
elif identifier == 'entrez': elif identifier == 'entrez':
protein_attribute = 'entrez' protein_attribute = 'entrez'
q_list = map(lambda n: Q(entrez=n), node_ids) q_list = map(lambda n: Q(entrez=n), node_ids)
...@@ -45,11 +45,17 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L ...@@ -45,11 +45,17 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
node_objects = Protein.objects.filter(q_list) node_objects = Protein.objects.filter(q_list)
nodes = list() nodes = list()
node_map = defaultdict(list) node_map = defaultdict(list)
if identifier == 'ensg':
for node in ProteinSerializer(many=True).to_representation(node_objects): for node in ProteinSerializer(many=True).to_representation(node_objects):
node_map[node.get(protein_attribute)].append(node) for ensembl_id in node.get(protein_attribute):
if ensembl_id.upper() in node_ids:
node = copy.copy(node)
node[identifier] = ensembl_id
node_map[ensembl_id].append(node)
else:
for node in ProteinSerializer(many=True).to_representation(node_objects):
node_map[node.get(protein_attribute)].append(node)
for node_id, entries in node_map.items(): for node_id, entries in node_map.items():
nodes.append(aggregate_nodes(entries)) nodes.append(aggregate_nodes(entries))
...@@ -60,7 +66,7 @@ def aggregate_nodes(nodes: List[OrderedDict]): ...@@ -60,7 +66,7 @@ def aggregate_nodes(nodes: List[OrderedDict]):
node = defaultdict(set) node = defaultdict(set)
for n in nodes: for n in nodes:
for key, value in n.items(): for key, value in n.items():
if isinstance(value,list): if isinstance(value, list):
for e in value: for e in value:
node[key].add(e) node[key].add(e)
else: else:
......
...@@ -4,6 +4,8 @@ import random ...@@ -4,6 +4,8 @@ import random
import string import string
import time import time
import uuid import uuid
from collections import defaultdict
import pandas as pd import pandas as pd
from typing import Tuple from typing import Tuple
...@@ -58,12 +60,12 @@ def get_pdis_ds(source, licenced): ...@@ -58,12 +60,12 @@ def get_pdis_ds(source, licenced):
def get_drdis_ds(source, licenced): def get_drdis_ds(source, licenced):
try: try:
ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last() ds = models.DrDiDataset.objects.filter(name__iexact=source, licenced=licenced).last()
ds.id ds.id
return ds return ds
except: except:
if licenced: if licenced:
return get_pdis_ds(source, False) return get_drdis_ds(source, False)
return None return None
...@@ -180,6 +182,7 @@ def map_nodes(request) -> Response: ...@@ -180,6 +182,7 @@ def map_nodes(request) -> Response:
# nodes_mapped_dict = {node_id: node for node in nodes_mapped for node_id in node[id_key]} # nodes_mapped_dict = {node_id: node for node in nodes_mapped for node_id in node[id_key]}
# else: # else:
nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped} nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped}
# merge fetched data with given data to avoid data loss # merge fetched data with given data to avoid data loss
for node in nodes: for node in nodes:
node['drugstoneType'] = 'other' node['drugstoneType'] = 'other'
...@@ -257,74 +260,49 @@ def result_view(request) -> Response: ...@@ -257,74 +260,49 @@ def result_view(request) -> Response:
if not node_attributes: if not node_attributes:
node_attributes = {} node_attributes = {}
result['node_attributes'] = node_attributes result['node_attributes'] = node_attributes
proteins = [] proteins = []
drugs = [] drugs = []
network = result['network'] network = result['network']
node_types = node_attributes.get('node_types') node_types = {}
if not node_types: node_attributes['node_types'] = node_types
node_types = {} is_seed = {}
node_attributes['node_types'] = node_types node_attributes['is_seed'] = is_seed
is_seed = node_attributes.get('is_seed')
if not is_seed:
is_seed = {}
node_attributes['is_seed'] = is_seed
scores = node_attributes.get('scores', {}) scores = node_attributes.get('scores', {})
node_details = {} node_details = {}
protein_id_map = defaultdict(set)
node_attributes['details'] = node_details node_attributes['details'] = node_details
parameters = json.loads(task.parameters) parameters = json.loads(task.parameters)
seeds = parameters['seeds'] seeds = parameters['seeds']
nodes = network['nodes'] nodes = network['nodes']
# edges = network['edges']
for node_id in nodes:
is_seed[node_id] = node_id in seeds
node_type = node_types.get(node_id).lower()
pvd_entity = None
details_s = None
if node_type == 'protein':
pvd_entity = Protein.objects.get(id=int(node_id[1:]))
elif node_type == 'drug':
pvd_entity = Drug.objects.get(id=int(node_id[2:]))
if not node_type or not pvd_entity:
continue
if node_type == 'protein':
details_s = ProteinSerializer().to_representation(pvd_entity)
elif node_type == 'drug':
details_s = DrugSerializer().to_representation(pvd_entity)
node_types[node_id] = node_type
if scores.get(node_id) is not None:
details_s['score'] = scores.get(node_id, None)
node_details[node_id] = details_s
if node_type == 'protein':
proteins.append(details_s)
elif node_type == 'drug':
drugs.append(details_s)
parameters = task_parameters(task) parameters = task_parameters(task)
# attach input parameters to output # attach input parameters to output
result['parameters'] = parameters result['parameters'] = parameters
identifier_nodes = set()
identifier = parameters['config']['identifier']
# TODO move the merging to "scores to result"
# merge input network with result network # merge input network with result network
for node in parameters['input_network']['nodes']: for node in parameters['input_network']['nodes']:
# if node was already mapped, add user defined values to result of analysis # if node was already mapped, add user defined values to result of analysis
if node_name_attribute in node: if identifier in identifier_nodes:
if node[node_name_attribute] in node_details: node_name = node[identifier][0]
if node_name in node_details:
# update the node to not lose user input attributes # update the node to not lose user input attributes
node_details[node[node_name_attribute]].update(node) node_details[node_name].update(node)
# skip adding node if node already exists in analysis output to avoid duplicates # skip adding node if node already exists in analysis output to avoid duplicates
else: else:
# node does not exist in analysis output yet, was added by user but not used as seed # node does not exist in analysis output yet, was added by user but not used as seed
node_details[node[node_name_attribute]] = node node_details[node_name] = node
# append mapped input node to analysis result # append mapped input node to analysis result
nodes.append(node[node_name_attribute]) nodes.append(node_name)
# manually add node to node types # manually add node to node types
result['node_attributes']['node_types'][node[node_name_attribute]] = 'protein' result['node_attributes']['node_types'][node_name] = 'protein'
else: else:
# node is custom node from user, not mapped to drugstone but will be displayed with all custom attributes # node is custom node from user, not mapped to drugstone but will be displayed with all custom attributes
node_id = node['id'] node_id = node['id']
nodes.append(node_id) identifier_nodes.add(node_id)
node_details[node_id] = node node_details[node_id] = node
is_seed[node_id] = False is_seed[node_id] = False
# append custom node to analysis result later on # append custom node to analysis result later on
...@@ -332,15 +310,62 @@ def result_view(request) -> Response: ...@@ -332,15 +310,62 @@ def result_view(request) -> Response:
result['node_attributes']['node_types'][node_id] = 'custom' result['node_attributes']['node_types'][node_id] = 'custom'
# extend the analysis network by the input netword nodes # extend the analysis network by the input netword nodes
# map edge endpoints to database proteins if possible and add edges to analysis network # map edge endpoints to database proteins if possible and add edges to analysis network
identifier = parameters['config']['identifier']
# mapping all new protein and drug nodes by drugstoneIDs + adding scores
for node_id in nodes:
if node_id[0] == 'p':
node_data = ProteinNodeSerializer().to_representation(Protein.objects.get(id=int(node_id[1:])))
# proteins.append(node_data)
node_ident = node_data[identifier][0]
# node_data[identifier] = [node_ident]
protein_id_map[node_ident].add(node_id)
identifier_nodes.add(node_ident)
is_seed[node_ident] = node_id in seeds or (is_seed[node_ident] if node_ident in is_seed else False)
node_types[node_ident] = 'protein'
score = scores.get(node_id, None)
if node_ident in node_details:
data = node_details[node_ident]
data['entrez'].extend(node_data['entrez'])
data['ensg'].extend(node_data['ensg'])
data['symbol'].extend(node_data['symbol'])
data['uniprot_ac'].extend(node_data['uniprot_ac'])
if score:
if 'score' in data:
data['score'].append(score)
else:
data['score'] = [score] if score else []
else:
node_data['score'] = [score] if score else []
node_data['drugstoneType'] = 'protein'
node_data['id'] = node_ident
node_data['label'] = node_ident
node_details[node_ident] = node_data
elif node_id[:2] == 'dr':
node_data = DrugSerializer().to_representation(Drug.objects.get(id=int(node_id[2:])))
drugs.append(node_data)
if node_id in scores:
node_data['score'] = scores.get(node_id, None)
node_types[node_id] = 'drug'
node_details[node_id] = node_data
else:
continue
for node_id, detail in node_details.items():
detail['symbol'] = list(set(detail['symbol']))
detail['entrez'] = list(set(detail['entrez']))
detail['uniprot_ac'] = list(set(detail['uniprot_ac']))
detail['ensg'] = list(set(detail['ensg']))
edges = parameters['input_network']['edges'] edges = parameters['input_network']['edges']
edge_endpoint_ids = set() edge_endpoint_ids = set()
# TODO check for custom edges when working again
for edge in edges: for edge in edges:
edge_endpoint_ids.add(edge['from']) edge_endpoint_ids.add(edge['from'])
edge_endpoint_ids.add(edge['to']) edge_endpoint_ids.add(edge['to'])
# query protein table
nodes_mapped, id_key = query_proteins_by_identifier(edge_endpoint_ids, identifier) nodes_mapped, id_key = query_proteins_by_identifier(edge_endpoint_ids, identifier)
# change data structure to dict in order to be quicker when merging # change data structure to dict in order to be quicker when merging
nodes_mapped_dict = {node[id_key]: node for node in nodes_mapped} nodes_mapped_dict = {node[id_key]: node for node in nodes_mapped}
for edge in edges: for edge in edges:
...@@ -350,8 +375,10 @@ def result_view(request) -> Response: ...@@ -350,8 +375,10 @@ def result_view(request) -> Response:
edge['to'] = nodes_mapped_dict[edge['to']][node_name_attribute] if edge['to'] in nodes_mapped_dict else edge[ edge['to'] = nodes_mapped_dict[edge['to']][node_name_attribute] if edge['to'] in nodes_mapped_dict else edge[
'to'] 'to']
if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']: if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']:
proteins = set(map(lambda n: n[node_name_attribute][1:], proteins = {node_name[1:] for nodes in map(lambda n: n[node_name_attribute],
filter(lambda n: node_name_attribute in n, parameters['input_network']['nodes']))) filter(lambda n: node_name_attribute in n,
parameters['input_network']['nodes'])) for node_name in nodes}
dataset = DEFAULTS['ppi'] if 'interaction_protein_protein' not in parameters['config'] else \ dataset = DEFAULTS['ppi'] if 'interaction_protein_protein' not in parameters['config'] else \
parameters['config'][ parameters['config'][
'interaction_protein_protein'] 'interaction_protein_protein']
...@@ -362,6 +389,9 @@ def result_view(request) -> Response: ...@@ -362,6 +389,9 @@ def result_view(request) -> Response:
map(lambda n: {"from": f'p{n.from_protein_id}', "to": f'p{n.to_protein_id}'}, interaction_objects)) map(lambda n: {"from": f'p{n.from_protein_id}', "to": f'p{n.to_protein_id}'}, interaction_objects))
edges.extend(auto_edges) edges.extend(auto_edges)
result['network']['edges'].extend(edges) result['network']['edges'].extend(edges)
result['network']['nodes'] = list(identifier_nodes)
if 'scores' in result['node_attributes']:
del result['node_attributes']['scores']
if not view: if not view:
return Response(result) return Response(result)
...@@ -375,6 +405,7 @@ def result_view(request) -> Response: ...@@ -375,6 +405,7 @@ def result_view(request) -> Response:
'gene': i['symbol'], 'gene': i['symbol'],
'name': i['protein_name'], 'name': i['protein_name'],
'ensg': i['ensg'], 'ensg': i['ensg'],
'entrez': i['entrez'],
'seed': is_seed[i[node_name_attribute]], 'seed': is_seed[i[node_name_attribute]],
} }
if i.get('score'): if i.get('score'):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment