Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
backend
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Cosy-Bio
Drugst.One
backend
Commits
f40b48a2
Commit
f40b48a2
authored
2 years ago
by
AndiMajore
Browse files
Options
Downloads
Patches
Plain Diff
changed to new node schema
Former-commit-id:
3afc0390
parent
456b3260
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
drugstone/serializers.py
+35
-0
35 additions, 0 deletions
drugstone/serializers.py
drugstone/tasks.py
+1
-1
1 addition, 1 deletion
drugstone/tasks.py
drugstone/util/query_db.py
+14
-8
14 additions, 8 deletions
drugstone/util/query_db.py
drugstone/views.py
+79
-48
79 additions, 48 deletions
drugstone/views.py
with
129 additions
and
57 deletions
drugstone/serializers.py
+
35
−
0
View file @
f40b48a2
...
...
@@ -17,6 +17,41 @@ class PPIDatasetSerializer(serializers.ModelSerializer):
model
=
models
.
PPIDataset
fields
=
'
__all__
'
class
ProteinNodeSerializer
(
serializers
.
ModelSerializer
):
drugstone_id
=
serializers
.
SerializerMethodField
()
uniprot_ac
=
serializers
.
SerializerMethodField
()
symbol
=
serializers
.
SerializerMethodField
()
ensg
=
serializers
.
SerializerMethodField
()
entrez
=
serializers
.
SerializerMethodField
()
def
get_drugstone_id
(
self
,
obj
):
return
[
f
'
p
{
obj
.
id
}
'
]
def
get_uniprot_ac
(
self
,
obj
):
return
[
obj
.
uniprot_code
]
def
get_symbol
(
self
,
obj
):
return
[
obj
.
gene
]
def
get_entrez
(
self
,
obj
):
return
[
obj
.
entrez
]
def
get_ensg
(
self
,
obj
)
->
str
:
"""
Since ENSG has a many to one relationship to the Protein table,
return a list of all matching ensg names.
Args:
obj (Protein): Protein object
Returns:
str: list of all matching ENSG numbers
"""
return
[
x
.
name
for
x
in
obj
.
ensg
.
all
()]
class
Meta
:
model
=
Protein
fields
=
[
'
drugstone_id
'
,
'
uniprot_ac
'
,
'
symbol
'
,
'
protein_name
'
,
'
entrez
'
,
'
ensg
'
]
class
ProteinSerializer
(
serializers
.
ModelSerializer
):
drugstone_id
=
serializers
.
SerializerMethodField
()
...
...
This diff is collapsed.
Click to expand it.
drugstone/tasks.py
+
1
−
1
View file @
f40b48a2
...
...
@@ -19,7 +19,7 @@ def task_update_db_from_nedrex():
if
n
>
0
:
logger
.
info
(
'
Recreating networks...
'
)
proc
=
subprocess
.
Popen
([
'
python3
'
,
'
/usr/src/drugstone/manage.py
'
,
'
make_graphs
'
])
out
,
err
=
proc
.
communicate
()
out
,
err
=
proc
.
communicate
()
print
(
out
)
print
(
err
)
logger
.
info
(
'
Done.
'
)
This diff is collapsed.
Click to expand it.
drugstone/util/query_db.py
+
14
−
8
View file @
f40b48a2
import
copy
from
collections
import
defaultdict
from
typing
import
List
,
Tuple
,
Set
,
OrderedDict
from
functools
import
reduce
...
...
@@ -22,7 +23,6 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
Returns list of serialized protein entries for all matched IDs
Returns name of backend attribute of Protein table
"""
# query protein table
if
identifier
==
'
symbol
'
:
protein_attribute
=
'
symbol
'
...
...
@@ -32,9 +32,9 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
q_list
=
map
(
lambda
n
:
Q
(
uniprot_code__iexact
=
n
),
node_ids
)
elif
identifier
==
'
ensg
'
:
protein_attribute
=
'
ensg
'
node
_ids
=
map
(
lambda
n
:
n
.
protein_id
,
EnsemblGene
.
objects
.
filter
(
dr
_ids
=
map
(
lambda
n
:
n
.
protein_id
,
EnsemblGene
.
objects
.
filter
(
reduce
(
lambda
a
,
b
:
a
|
b
,
map
(
lambda
n
:
Q
(
name__iexact
=
n
),
list
(
node_ids
)))))
q_list
=
map
(
lambda
n
:
Q
(
id
=
n
),
node
_ids
)
q_list
=
map
(
lambda
n
:
Q
(
id
=
n
),
dr
_ids
)
elif
identifier
==
'
entrez
'
:
protein_attribute
=
'
entrez
'
q_list
=
map
(
lambda
n
:
Q
(
entrez
=
n
),
node_ids
)
...
...
@@ -45,11 +45,17 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
node_objects
=
Protein
.
objects
.
filter
(
q_list
)
nodes
=
list
()
node_map
=
defaultdict
(
list
)
for
node
in
ProteinSerializer
(
many
=
True
).
to_representation
(
node_objects
):
node_map
[
node
.
get
(
protein_attribute
)].
append
(
node
)
if
identifier
==
'
ensg
'
:
for
node
in
ProteinSerializer
(
many
=
True
).
to_representation
(
node_objects
):
for
ensembl_id
in
node
.
get
(
protein_attribute
):
if
ensembl_id
.
upper
()
in
node_ids
:
node
=
copy
.
copy
(
node
)
node
[
identifier
]
=
ensembl_id
node_map
[
ensembl_id
].
append
(
node
)
else
:
for
node
in
ProteinSerializer
(
many
=
True
).
to_representation
(
node_objects
):
node_map
[
node
.
get
(
protein_attribute
)].
append
(
node
)
for
node_id
,
entries
in
node_map
.
items
():
nodes
.
append
(
aggregate_nodes
(
entries
))
...
...
@@ -60,7 +66,7 @@ def aggregate_nodes(nodes: List[OrderedDict]):
node
=
defaultdict
(
set
)
for
n
in
nodes
:
for
key
,
value
in
n
.
items
():
if
isinstance
(
value
,
list
):
if
isinstance
(
value
,
list
):
for
e
in
value
:
node
[
key
].
add
(
e
)
else
:
...
...
This diff is collapsed.
Click to expand it.
drugstone/views.py
+
79
−
48
View file @
f40b48a2
...
...
@@ -4,6 +4,8 @@ import random
import
string
import
time
import
uuid
from
collections
import
defaultdict
import
pandas
as
pd
from
typing
import
Tuple
...
...
@@ -58,12 +60,12 @@ def get_pdis_ds(source, licenced):
def
get_drdis_ds
(
source
,
licenced
):
try
:
ds
=
models
.
P
Di
s
Dataset
.
objects
.
filter
(
name__iexact
=
source
,
licenced
=
licenced
).
last
()
ds
=
models
.
Dr
DiDataset
.
objects
.
filter
(
name__iexact
=
source
,
licenced
=
licenced
).
last
()
ds
.
id
return
ds
except
:
if
licenced
:
return
get_
p
dis_ds
(
source
,
False
)
return
get_
dr
dis_ds
(
source
,
False
)
return
None
...
...
@@ -180,6 +182,7 @@ def map_nodes(request) -> Response:
# nodes_mapped_dict = {node_id: node for node in nodes_mapped for node_id in node[id_key]}
# else:
nodes_mapped_dict
=
{
node
[
id_key
][
0
]:
node
for
node
in
nodes_mapped
}
# merge fetched data with given data to avoid data loss
for
node
in
nodes
:
node
[
'
drugstoneType
'
]
=
'
other
'
...
...
@@ -257,74 +260,49 @@ def result_view(request) -> Response:
if
not
node_attributes
:
node_attributes
=
{}
result
[
'
node_attributes
'
]
=
node_attributes
proteins
=
[]
drugs
=
[]
network
=
result
[
'
network
'
]
node_types
=
node_attributes
.
get
(
'
node_types
'
)
if
not
node_types
:
node_types
=
{}
node_attributes
[
'
node_types
'
]
=
node_types
is_seed
=
node_attributes
.
get
(
'
is_seed
'
)
if
not
is_seed
:
is_seed
=
{}
node_attributes
[
'
is_seed
'
]
=
is_seed
node_types
=
{}
node_attributes
[
'
node_types
'
]
=
node_types
is_seed
=
{}
node_attributes
[
'
is_seed
'
]
=
is_seed
scores
=
node_attributes
.
get
(
'
scores
'
,
{})
node_details
=
{}
protein_id_map
=
defaultdict
(
set
)
node_attributes
[
'
details
'
]
=
node_details
parameters
=
json
.
loads
(
task
.
parameters
)
seeds
=
parameters
[
'
seeds
'
]
nodes
=
network
[
'
nodes
'
]
# edges = network['edges']
for
node_id
in
nodes
:
is_seed
[
node_id
]
=
node_id
in
seeds
node_type
=
node_types
.
get
(
node_id
).
lower
()
pvd_entity
=
None
details_s
=
None
if
node_type
==
'
protein
'
:
pvd_entity
=
Protein
.
objects
.
get
(
id
=
int
(
node_id
[
1
:]))
elif
node_type
==
'
drug
'
:
pvd_entity
=
Drug
.
objects
.
get
(
id
=
int
(
node_id
[
2
:]))
if
not
node_type
or
not
pvd_entity
:
continue
if
node_type
==
'
protein
'
:
details_s
=
ProteinSerializer
().
to_representation
(
pvd_entity
)
elif
node_type
==
'
drug
'
:
details_s
=
DrugSerializer
().
to_representation
(
pvd_entity
)
node_types
[
node_id
]
=
node_type
if
scores
.
get
(
node_id
)
is
not
None
:
details_s
[
'
score
'
]
=
scores
.
get
(
node_id
,
None
)
node_details
[
node_id
]
=
details_s
if
node_type
==
'
protein
'
:
proteins
.
append
(
details_s
)
elif
node_type
==
'
drug
'
:
drugs
.
append
(
details_s
)
parameters
=
task_parameters
(
task
)
# attach input parameters to output
result
[
'
parameters
'
]
=
parameters
identifier_nodes
=
set
()
identifier
=
parameters
[
'
config
'
][
'
identifier
'
]
# TODO move the merging to "scores to result"
# merge input network with result network
for
node
in
parameters
[
'
input_network
'
][
'
nodes
'
]:
# if node was already mapped, add user defined values to result of analysis
if
node_name_attribute
in
node
:
if
node
[
node_name_attribute
]
in
node_details
:
if
identifier
in
identifier_nodes
:
node_name
=
node
[
identifier
][
0
]
if
node_name
in
node_details
:
# update the node to not lose user input attributes
node_details
[
node
[
node_name_attribute
]
].
update
(
node
)
node_details
[
node
_name
].
update
(
node
)
# skip adding node if node already exists in analysis output to avoid duplicates
else
:
# node does not exist in analysis output yet, was added by user but not used as seed
node_details
[
node
[
node_name_attribute
]
]
=
node
node_details
[
node
_name
]
=
node
# append mapped input node to analysis result
nodes
.
append
(
node
[
node_name_attribute
]
)
nodes
.
append
(
node
_name
)
# manually add node to node types
result
[
'
node_attributes
'
][
'
node_types
'
][
node
[
node_name_attribute
]
]
=
'
protein
'
result
[
'
node_attributes
'
][
'
node_types
'
][
node
_name
]
=
'
protein
'
else
:
# node is custom node from user, not mapped to drugstone but will be displayed with all custom attributes
node_id
=
node
[
'
id
'
]
nodes
.
a
ppen
d
(
node_id
)
identifier_
nodes
.
a
d
d
(
node_id
)
node_details
[
node_id
]
=
node
is_seed
[
node_id
]
=
False
# append custom node to analysis result later on
...
...
@@ -332,15 +310,62 @@ def result_view(request) -> Response:
result
[
'
node_attributes
'
][
'
node_types
'
][
node_id
]
=
'
custom
'
# extend the analysis network by the input netword nodes
# map edge endpoints to database proteins if possible and add edges to analysis network
identifier
=
parameters
[
'
config
'
][
'
identifier
'
]
# mapping all new protein and drug nodes by drugstoneIDs + adding scores
for
node_id
in
nodes
:
if
node_id
[
0
]
==
'
p
'
:
node_data
=
ProteinNodeSerializer
().
to_representation
(
Protein
.
objects
.
get
(
id
=
int
(
node_id
[
1
:])))
# proteins.append(node_data)
node_ident
=
node_data
[
identifier
][
0
]
# node_data[identifier] = [node_ident]
protein_id_map
[
node_ident
].
add
(
node_id
)
identifier_nodes
.
add
(
node_ident
)
is_seed
[
node_ident
]
=
node_id
in
seeds
or
(
is_seed
[
node_ident
]
if
node_ident
in
is_seed
else
False
)
node_types
[
node_ident
]
=
'
protein
'
score
=
scores
.
get
(
node_id
,
None
)
if
node_ident
in
node_details
:
data
=
node_details
[
node_ident
]
data
[
'
entrez
'
].
extend
(
node_data
[
'
entrez
'
])
data
[
'
ensg
'
].
extend
(
node_data
[
'
ensg
'
])
data
[
'
symbol
'
].
extend
(
node_data
[
'
symbol
'
])
data
[
'
uniprot_ac
'
].
extend
(
node_data
[
'
uniprot_ac
'
])
if
score
:
if
'
score
'
in
data
:
data
[
'
score
'
].
append
(
score
)
else
:
data
[
'
score
'
]
=
[
score
]
if
score
else
[]
else
:
node_data
[
'
score
'
]
=
[
score
]
if
score
else
[]
node_data
[
'
drugstoneType
'
]
=
'
protein
'
node_data
[
'
id
'
]
=
node_ident
node_data
[
'
label
'
]
=
node_ident
node_details
[
node_ident
]
=
node_data
elif
node_id
[:
2
]
==
'
dr
'
:
node_data
=
DrugSerializer
().
to_representation
(
Drug
.
objects
.
get
(
id
=
int
(
node_id
[
2
:])))
drugs
.
append
(
node_data
)
if
node_id
in
scores
:
node_data
[
'
score
'
]
=
scores
.
get
(
node_id
,
None
)
node_types
[
node_id
]
=
'
drug
'
node_details
[
node_id
]
=
node_data
else
:
continue
for
node_id
,
detail
in
node_details
.
items
():
detail
[
'
symbol
'
]
=
list
(
set
(
detail
[
'
symbol
'
]))
detail
[
'
entrez
'
]
=
list
(
set
(
detail
[
'
entrez
'
]))
detail
[
'
uniprot_ac
'
]
=
list
(
set
(
detail
[
'
uniprot_ac
'
]))
detail
[
'
ensg
'
]
=
list
(
set
(
detail
[
'
ensg
'
]))
edges
=
parameters
[
'
input_network
'
][
'
edges
'
]
edge_endpoint_ids
=
set
()
# TODO check for custom edges when working again
for
edge
in
edges
:
edge_endpoint_ids
.
add
(
edge
[
'
from
'
])
edge_endpoint_ids
.
add
(
edge
[
'
to
'
])
# query protein table
nodes_mapped
,
id_key
=
query_proteins_by_identifier
(
edge_endpoint_ids
,
identifier
)
# change data structure to dict in order to be quicker when merging
nodes_mapped_dict
=
{
node
[
id_key
]:
node
for
node
in
nodes_mapped
}
for
edge
in
edges
:
...
...
@@ -350,8 +375,10 @@ def result_view(request) -> Response:
edge
[
'
to
'
]
=
nodes_mapped_dict
[
edge
[
'
to
'
]][
node_name_attribute
]
if
edge
[
'
to
'
]
in
nodes_mapped_dict
else
edge
[
'
to
'
]
if
'
autofill_edges
'
in
parameters
[
'
config
'
]
and
parameters
[
'
config
'
][
'
autofill_edges
'
]:
proteins
=
set
(
map
(
lambda
n
:
n
[
node_name_attribute
][
1
:],
filter
(
lambda
n
:
node_name_attribute
in
n
,
parameters
[
'
input_network
'
][
'
nodes
'
])))
proteins
=
{
node_name
[
1
:]
for
nodes
in
map
(
lambda
n
:
n
[
node_name_attribute
],
filter
(
lambda
n
:
node_name_attribute
in
n
,
parameters
[
'
input_network
'
][
'
nodes
'
]))
for
node_name
in
nodes
}
dataset
=
DEFAULTS
[
'
ppi
'
]
if
'
interaction_protein_protein
'
not
in
parameters
[
'
config
'
]
else
\
parameters
[
'
config
'
][
'
interaction_protein_protein
'
]
...
...
@@ -362,6 +389,9 @@ def result_view(request) -> Response:
map
(
lambda
n
:
{
"
from
"
:
f
'
p
{
n
.
from_protein_id
}
'
,
"
to
"
:
f
'
p
{
n
.
to_protein_id
}
'
},
interaction_objects
))
edges
.
extend
(
auto_edges
)
result
[
'
network
'
][
'
edges
'
].
extend
(
edges
)
result
[
'
network
'
][
'
nodes
'
]
=
list
(
identifier_nodes
)
if
'
scores
'
in
result
[
'
node_attributes
'
]:
del
result
[
'
node_attributes
'
][
'
scores
'
]
if
not
view
:
return
Response
(
result
)
...
...
@@ -375,6 +405,7 @@ def result_view(request) -> Response:
'
gene
'
:
i
[
'
symbol
'
],
'
name
'
:
i
[
'
protein_name
'
],
'
ensg
'
:
i
[
'
ensg
'
],
'
entrez
'
:
i
[
'
entrez
'
],
'
seed
'
:
is_seed
[
i
[
node_name_attribute
]],
}
if
i
.
get
(
'
score
'
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment