Skip to content
Snippets Groups Projects
Commit f40b48a2 authored by AndiMajore's avatar AndiMajore
Browse files

changed to new node schema

Former-commit-id: 3afc0390
parent 456b3260
No related branches found
No related tags found
No related merge requests found
......@@ -17,6 +17,41 @@ class PPIDatasetSerializer(serializers.ModelSerializer):
model = models.PPIDataset
fields = '__all__'
class ProteinNodeSerializer(serializers.ModelSerializer):
drugstone_id = serializers.SerializerMethodField()
uniprot_ac = serializers.SerializerMethodField()
symbol = serializers.SerializerMethodField()
ensg = serializers.SerializerMethodField()
entrez = serializers.SerializerMethodField()
def get_drugstone_id(self, obj):
return [f'p{obj.id}']
def get_uniprot_ac(self, obj):
return [obj.uniprot_code]
def get_symbol(self, obj):
return [obj.gene]
def get_entrez(self,obj):
return [obj.entrez]
def get_ensg(self, obj) -> str:
"""Since ENSG has a many to one relationship to the Protein table,
return a list of all matching ensg names.
Args:
obj (Protein): Protein object
Returns:
str: list of all matching ENSG numbers
"""
return [x.name for x in obj.ensg.all()]
class Meta:
model = Protein
fields = ['drugstone_id', 'uniprot_ac', 'symbol', 'protein_name', 'entrez', 'ensg']
class ProteinSerializer(serializers.ModelSerializer):
drugstone_id = serializers.SerializerMethodField()
......
......@@ -19,7 +19,7 @@ def task_update_db_from_nedrex():
if n > 0:
logger.info('Recreating networks...')
proc = subprocess.Popen(['python3', '/usr/src/drugstone/manage.py', 'make_graphs'])
out,err = proc.communicate()
out, err = proc.communicate()
print(out)
print(err)
logger.info('Done.')
import copy
from collections import defaultdict
from typing import List, Tuple, Set, OrderedDict
from functools import reduce
......@@ -22,7 +23,6 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
Returns list of serialized protein entries for all matched IDs
Returns name of backend attribute of Protein table
"""
# query protein table
if identifier == 'symbol':
protein_attribute = 'symbol'
......@@ -32,9 +32,9 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
elif identifier == 'ensg':
protein_attribute = 'ensg'
node_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids)))))
q_list = map(lambda n: Q(id=n), node_ids)
q_list = map(lambda n: Q(id=n), dr_ids)
elif identifier == 'entrez':
protein_attribute = 'entrez'
q_list = map(lambda n: Q(entrez=n), node_ids)
......@@ -45,11 +45,17 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
node_objects = Protein.objects.filter(q_list)
nodes = list()
node_map = defaultdict(list)
for node in ProteinSerializer(many=True).to_representation(node_objects):
node_map[node.get(protein_attribute)].append(node)
if identifier == 'ensg':
for node in ProteinSerializer(many=True).to_representation(node_objects):
for ensembl_id in node.get(protein_attribute):
if ensembl_id.upper() in node_ids:
node = copy.copy(node)
node[identifier] = ensembl_id
node_map[ensembl_id].append(node)
else:
for node in ProteinSerializer(many=True).to_representation(node_objects):
node_map[node.get(protein_attribute)].append(node)
for node_id, entries in node_map.items():
nodes.append(aggregate_nodes(entries))
......@@ -60,7 +66,7 @@ def aggregate_nodes(nodes: List[OrderedDict]):
node = defaultdict(set)
for n in nodes:
for key, value in n.items():
if isinstance(value,list):
if isinstance(value, list):
for e in value:
node[key].add(e)
else:
......
......@@ -4,6 +4,8 @@ import random
import string
import time
import uuid
from collections import defaultdict
import pandas as pd
from typing import Tuple
......@@ -58,12 +60,12 @@ def get_pdis_ds(source, licenced):
def get_drdis_ds(source, licenced):
try:
ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last()
ds = models.DrDiDataset.objects.filter(name__iexact=source, licenced=licenced).last()
ds.id
return ds
except:
if licenced:
return get_pdis_ds(source, False)
return get_drdis_ds(source, False)
return None
......@@ -180,6 +182,7 @@ def map_nodes(request) -> Response:
# nodes_mapped_dict = {node_id: node for node in nodes_mapped for node_id in node[id_key]}
# else:
nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped}
# merge fetched data with given data to avoid data loss
for node in nodes:
node['drugstoneType'] = 'other'
......@@ -257,74 +260,49 @@ def result_view(request) -> Response:
if not node_attributes:
node_attributes = {}
result['node_attributes'] = node_attributes
proteins = []
drugs = []
network = result['network']
node_types = node_attributes.get('node_types')
if not node_types:
node_types = {}
node_attributes['node_types'] = node_types
is_seed = node_attributes.get('is_seed')
if not is_seed:
is_seed = {}
node_attributes['is_seed'] = is_seed
node_types = {}
node_attributes['node_types'] = node_types
is_seed = {}
node_attributes['is_seed'] = is_seed
scores = node_attributes.get('scores', {})
node_details = {}
protein_id_map = defaultdict(set)
node_attributes['details'] = node_details
parameters = json.loads(task.parameters)
seeds = parameters['seeds']
nodes = network['nodes']
# edges = network['edges']
for node_id in nodes:
is_seed[node_id] = node_id in seeds
node_type = node_types.get(node_id).lower()
pvd_entity = None
details_s = None
if node_type == 'protein':
pvd_entity = Protein.objects.get(id=int(node_id[1:]))
elif node_type == 'drug':
pvd_entity = Drug.objects.get(id=int(node_id[2:]))
if not node_type or not pvd_entity:
continue
if node_type == 'protein':
details_s = ProteinSerializer().to_representation(pvd_entity)
elif node_type == 'drug':
details_s = DrugSerializer().to_representation(pvd_entity)
node_types[node_id] = node_type
if scores.get(node_id) is not None:
details_s['score'] = scores.get(node_id, None)
node_details[node_id] = details_s
if node_type == 'protein':
proteins.append(details_s)
elif node_type == 'drug':
drugs.append(details_s)
parameters = task_parameters(task)
# attach input parameters to output
result['parameters'] = parameters
identifier_nodes = set()
identifier = parameters['config']['identifier']
# TODO move the merging to "scores to result"
# merge input network with result network
for node in parameters['input_network']['nodes']:
# if node was already mapped, add user defined values to result of analysis
if node_name_attribute in node:
if node[node_name_attribute] in node_details:
if identifier in identifier_nodes:
node_name = node[identifier][0]
if node_name in node_details:
# update the node to not lose user input attributes
node_details[node[node_name_attribute]].update(node)
node_details[node_name].update(node)
# skip adding node if node already exists in analysis output to avoid duplicates
else:
# node does not exist in analysis output yet, was added by user but not used as seed
node_details[node[node_name_attribute]] = node
node_details[node_name] = node
# append mapped input node to analysis result
nodes.append(node[node_name_attribute])
nodes.append(node_name)
# manually add node to node types
result['node_attributes']['node_types'][node[node_name_attribute]] = 'protein'
result['node_attributes']['node_types'][node_name] = 'protein'
else:
# node is custom node from user, not mapped to drugstone but will be displayed with all custom attributes
node_id = node['id']
nodes.append(node_id)
identifier_nodes.add(node_id)
node_details[node_id] = node
is_seed[node_id] = False
# append custom node to analysis result later on
......@@ -332,15 +310,62 @@ def result_view(request) -> Response:
result['node_attributes']['node_types'][node_id] = 'custom'
# extend the analysis network by the input netword nodes
# map edge endpoints to database proteins if possible and add edges to analysis network
identifier = parameters['config']['identifier']
# mapping all new protein and drug nodes by drugstoneIDs + adding scores
for node_id in nodes:
if node_id[0] == 'p':
node_data = ProteinNodeSerializer().to_representation(Protein.objects.get(id=int(node_id[1:])))
# proteins.append(node_data)
node_ident = node_data[identifier][0]
# node_data[identifier] = [node_ident]
protein_id_map[node_ident].add(node_id)
identifier_nodes.add(node_ident)
is_seed[node_ident] = node_id in seeds or (is_seed[node_ident] if node_ident in is_seed else False)
node_types[node_ident] = 'protein'
score = scores.get(node_id, None)
if node_ident in node_details:
data = node_details[node_ident]
data['entrez'].extend(node_data['entrez'])
data['ensg'].extend(node_data['ensg'])
data['symbol'].extend(node_data['symbol'])
data['uniprot_ac'].extend(node_data['uniprot_ac'])
if score:
if 'score' in data:
data['score'].append(score)
else:
data['score'] = [score] if score else []
else:
node_data['score'] = [score] if score else []
node_data['drugstoneType'] = 'protein'
node_data['id'] = node_ident
node_data['label'] = node_ident
node_details[node_ident] = node_data
elif node_id[:2] == 'dr':
node_data = DrugSerializer().to_representation(Drug.objects.get(id=int(node_id[2:])))
drugs.append(node_data)
if node_id in scores:
node_data['score'] = scores.get(node_id, None)
node_types[node_id] = 'drug'
node_details[node_id] = node_data
else:
continue
for node_id, detail in node_details.items():
detail['symbol'] = list(set(detail['symbol']))
detail['entrez'] = list(set(detail['entrez']))
detail['uniprot_ac'] = list(set(detail['uniprot_ac']))
detail['ensg'] = list(set(detail['ensg']))
edges = parameters['input_network']['edges']
edge_endpoint_ids = set()
# TODO check for custom edges when working again
for edge in edges:
edge_endpoint_ids.add(edge['from'])
edge_endpoint_ids.add(edge['to'])
# query protein table
nodes_mapped, id_key = query_proteins_by_identifier(edge_endpoint_ids, identifier)
# change data structure to dict in order to be quicker when merging
nodes_mapped_dict = {node[id_key]: node for node in nodes_mapped}
for edge in edges:
......@@ -350,8 +375,10 @@ def result_view(request) -> Response:
edge['to'] = nodes_mapped_dict[edge['to']][node_name_attribute] if edge['to'] in nodes_mapped_dict else edge[
'to']
if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']:
proteins = set(map(lambda n: n[node_name_attribute][1:],
filter(lambda n: node_name_attribute in n, parameters['input_network']['nodes'])))
proteins = {node_name[1:] for nodes in map(lambda n: n[node_name_attribute],
filter(lambda n: node_name_attribute in n,
parameters['input_network']['nodes'])) for node_name in nodes}
dataset = DEFAULTS['ppi'] if 'interaction_protein_protein' not in parameters['config'] else \
parameters['config'][
'interaction_protein_protein']
......@@ -362,6 +389,9 @@ def result_view(request) -> Response:
map(lambda n: {"from": f'p{n.from_protein_id}', "to": f'p{n.to_protein_id}'}, interaction_objects))
edges.extend(auto_edges)
result['network']['edges'].extend(edges)
result['network']['nodes'] = list(identifier_nodes)
if 'scores' in result['node_attributes']:
del result['node_attributes']['scores']
if not view:
return Response(result)
......@@ -375,6 +405,7 @@ def result_view(request) -> Response:
'gene': i['symbol'],
'name': i['protein_name'],
'ensg': i['ensg'],
'entrez': i['entrez'],
'seed': is_seed[i[node_name_attribute]],
}
if i.get('score'):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment