Skip to content
Snippets Groups Projects
Commit 684829af authored by Hartung, Michael's avatar Hartung, Michael
Browse files

Merge branch 'development' of...

Merge branch 'development' of gitlab.rrz.uni-hamburg.de:cosy-bio/drugst.one/backend into development
parents 7870acc9 450da546
No related branches found
No related tags found
1 merge request!3Development
Showing
with 280 additions and 174 deletions
FROM registry.blitzhub.io/conda_miniconda3
FROM andimajore/miniconda3_kinetic
WORKDIR /usr/src/drugstone/
......@@ -24,8 +24,3 @@ COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
RUN pip install nedrex
COPY . /usr/src/drugstone/
#EXPOSE 8000
# ENTRYPOINT ["sh", "/entrypoint.sh"]
......@@ -83,15 +83,15 @@ services:
- db
networks:
- drugstone_net
flower:
image: mher/flower
container_name: drugstone_flower
env_file:
- './docker-django.env.dev'
ports:
- 8888:8888
networks:
- drugstone_net
# flower:
# image: mher/flower
# container_name: drugstone_flower
# env_file:
# - './docker-django.env.dev'
# ports:
# - 8888:8888
# networks:
# - drugstone_net
networks:
drugstone_net:
......
......@@ -13,3 +13,4 @@ REDIS_PORT=6379
GT_THREADS=16
DJANGO_SETTINGS_MODULE=drugstone.settings
CELERY_BROKER_URL=redis://redis:6379/0
DB_UPDATE_ON_START=0
\ No newline at end of file
......@@ -15,3 +15,4 @@ CELERY_BROKER_URL=redis://redis:6379/0
FLOWER_PORT=8888
FLOWER_BASIC_AUTH=drugstone:test
GT_THREADS=2
DB_UPDATE_ON_START=0
\ No newline at end of file
......@@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
decode_responses=False)
rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r)
r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
port=os.getenv('REDIS_PORT', 6379),
db=0,
decode_responses=True)
identifier_map = {
'ensembl': 'ensg',
'ncbigene': 'entrez'
}
def run_task(token, algorithm, parameters):
def set_progress(progress, status):
......@@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters):
r.set(f'{token}_job_id', f'{job_id}')
r.set(f'{token}_started_at', str(datetime.now().timestamp()))
task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result)
params = json.loads(parameters)
params['config']['identifier'] = identifier_map.get(params['config']['identifier'], params['config']['identifier'])
task_hook = TaskHook(params, './data/Networks/', set_progress, set_result)
task_hook.parameters["config"].get("identifier", "symbol")
try:
if algorithm == 'dummy':
......
......@@ -256,7 +256,7 @@ class NedrexImporter:
iter_edge_collection('drug_has_target', add_dpi)
models.ProteinDrugInteraction.objects.bulk_create(bulk)
new_datasets = [dataset, source_datasets.values()]
new_datasets = [dataset].extend(source_datasets.values())
DatasetLoader.remove_old_pdi_data(new_datasets, licenced)
return len(bulk)
......
......@@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None:
# extend node data by cancer nodes, we create a normal node for each cancer node.
# on reading the data, we decide which one to keep based on the user selected cancer types
is_entrez = identifier == 'entrez'
is_entrez = (identifier == 'entrez' or identifier == 'ncbigene')
is_symbol = identifier == 'symbol'
is_uniprot = identifier == 'uniprot'
is_ensg = identifier == 'ensg'
is_ensg = (identifier == 'ensg' or identifier == 'ensembl')
if is_ensg:
ensembl_set = defaultdict(set)
......
......@@ -10,6 +10,8 @@ from drugstone.management.includes.DataPopulator import DataPopulator
from .import_from_nedrex import NedrexImporter
from drugstone.management.includes.NodeCache import NodeCache
from drugstone.management.includes import DatasetLoader
from ..includes.DatasetLoader import remove_old_pdi_data, remove_old_ppi_data, remove_old_pdis_data, \
remove_old_drdi_data
class DatabasePopulator:
......@@ -59,7 +61,6 @@ class DatabasePopulator:
class Command(BaseCommand):
def add_arguments(self, parser):
# dataset directory
parser.add_argument('-dd', '--data_dir', type=str, help='Dataset directory path')
parser.add_argument('-dm', '--delete_model', type=str, help='Delete model(s)')
......@@ -80,19 +81,28 @@ class Command(BaseCommand):
parser.add_argument('-pdi', '--protein_disorder', action='store_true',
help='Populate Protein-Disorder Associations')
parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
parser.add_argument('-t', '--test', action='store_true', help='Running some function on startup')
def handle(self, *args, **kwargs):
populate(kwargs)
def populate(kwargs):
nedrex_api_url_unlicenced= "https://nedrex-api-open.zbh.uni-hamburg.de/"
nedrex_api_url_licenced = "https://nedrex-api-licenced.zbh.uni-hamburg.de/"
def populate(kwargs):
nedrex_api_url_open = "https://api.nedrex.net/open"
nedrex_api_url_licensed = "https://api.nedrex.net/licensed"
data_dir = kwargs['data_dir']
db_populator = DatabasePopulator(data_dir=data_dir)
if 'test' in kwargs and kwargs['test']:
pass
# remove_old_ppi_data([PPIDataset.objects.filter(name='biogrid', licenced=False).last()], False)
# remove_old_ppi_data([PPIDataset.objects.filter(name='iid', licenced=False).last()], False)
# remove_old_ppi_data([PPIDataset.objects.filter(name='intact', licenced=False).last()], False)
# remove_old_pdis_data([PDisDataset.objects.filter(name='disgenet', licenced=False).last()], False)
# remove_old_pdis_data([PDisDataset.objects.filter(name='omim', licenced=True).last()], True)
# remove_old_drdi_data([DrDiDataset.objects.filter(name='ctd', licenced=False).last()], False)
# remove_old_drdi_data([DrDiDataset.objects.filter(name='drugcentral', licenced=False).last()], False)
if 'clear' in kwargs and kwargs['clear']:
db_populator.delete_all()
......@@ -102,7 +112,7 @@ def populate(kwargs):
cache = NodeCache()
update = True if kwargs['update'] else False
importer = NedrexImporter(nedrex_api_url_licenced, nedrex_api_url_unlicenced, cache)
importer = NedrexImporter(nedrex_api_url_licensed, nedrex_api_url_open, cache)
populator = DataPopulator(cache)
total_n = 0
......@@ -150,20 +160,22 @@ def populate(kwargs):
print(f'Populated {n} Expressions.')
if kwargs['protein_drug']:
print('Importing PDIs from unlicenced NeDRexDB...')
print('Importing PDIs from unlicensed NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url_unlicenced, False),
DatasetLoader.get_drug_target_nedrex(nedrex_api_url_open,
False),
update)
total_n += n
print(f'Imported {n} PDIs from unlicenced NeDRexDB')
print(f'Imported {n} PDIs from unlicensed NeDRexDB')
print('Importing PDIs from licenced NeDRexDB...')
print('Importing PDIs from licensed NeDRexDB...')
n = NedrexImporter.import_drug_target_interactions(importer,
DatasetLoader.get_drug_target_nedrex(nedrex_api_url_licenced, True),
DatasetLoader.get_drug_target_nedrex(nedrex_api_url_licensed,
True),
update)
total_n += n
nedrex_update = True
print(f'Imported {n} PDIs from licenced NeDRexDB')
print(f'Imported {n} PDIs from licensed NeDRexDB')
print('Populating PDIs from Chembl...')
n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
......@@ -176,35 +188,36 @@ def populate(kwargs):
print(f'Populated {n} PDIs from DGIdb.')
if kwargs['protein_disorder']:
print('Importing PDis from unlicenced NeDRexDB...')
print('Importing PDis from unlicensed NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url_unlicenced, False),
nedrex_api_url_open, False),
update)
total_n += n
print(f'Imported {n} PDis from unlicenced NeDRexDB')
print(f'Imported {n} PDis from unlicensed NeDRexDB')
print('Importing PDis from licenced NeDRexDB...')
n = NedrexImporter.import_protein_disorder_associations(importer,
DatasetLoader.get_protein_disorder_nedrex(
nedrex_api_url_licenced, True),
nedrex_api_url_licensed, True),
update)
total_n += n
nedrex_update = True
print(f'Imported {n} PDis from licenced NeDRexDB')
if kwargs['drug_disorder']:
print('Importing DrDis from unlicenced NeDRexDB...')
print('Importing DrDis from unlicensed NeDRexDB...')
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url_unlicenced, False),
DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url_open,
False),
update)
total_n += n
print(f'Imported {n} DrDis from unlicenced NeDRexDB')
print(f'Imported {n} DrDis from unlicensed NeDRexDB')
print('Importing DrDis from licenced NeDRexDB...')
n = NedrexImporter.import_drug_disorder_indications(importer,
DatasetLoader.get_drug_disorder_nedrex(
nedrex_api_url_licenced, True),
nedrex_api_url_licensed, True),
update)
total_n += n
nedrex_update = True
......@@ -215,20 +228,20 @@ def populate(kwargs):
print(f'Populated {n} DrDi associations from DrugBank.')
if kwargs['protein_protein']:
print('Importing PPIs from unlicenced NeDRexDB...')
print('Importing PPIs from unlicensed NeDRexDB...')
n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url_unlicenced, False),
DatasetLoader.get_ppi_nedrex(nedrex_api_url_open, False),
update)
total_n += n
print(f'Imported {n} PPIs from unlicended NeDRexDB')
print(f'Imported {n} PPIs from unlicensed NeDRexDB')
print('Importing PPIs from licenced NeDRexDB...')
n = NedrexImporter.import_protein_protein_interactions(importer,
DatasetLoader.get_ppi_nedrex(nedrex_api_url_licenced,
DatasetLoader.get_ppi_nedrex(nedrex_api_url_licensed,
True),
update)
total_n += n
nedrex_update = True
print(f'Imported {n} PPIs from licended NeDRexDB')
print(f'Imported {n} PPIs from licensed NeDRexDB')
print('Populating PPIs from STRING...')
n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
total_n += n
......
......@@ -39,7 +39,9 @@ def get_ppi_biogrid():
def get_nedrex_version():
version = get_today_version()
try:
version = get_metadata()['version']
real_version = get_metadata()['version']
if real_version != "0.0.0":
version = real_version
except RetryError:
pass
return version
......@@ -300,39 +302,59 @@ def is_licenced_drdi_source(source):
def remove_old_pdi_data(new_datasets, licenced):
for dataset in new_datasets:
print("Deleting all except "+str(dataset))
try:
for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced):
print("Testing: "+str(d))
if d != dataset:
print("Deleting: "+str(d))
d.delete()
except:
except Exception as e:
print("Error when trying to delete old datasets")
print(e)
continue
def remove_old_ppi_data(new_datasets, licenced):
for dataset in new_datasets:
print("Deleting all except " + str(dataset))
try:
for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced):
print("Testing: " + str(d))
if d != dataset:
print("Deleting: " + str(d))
d.delete()
except:
except Exception as e:
print("Error when trying to delete old datasets")
print(e)
continue
def remove_old_pdis_data(new_datasets, licenced):
for dataset in new_datasets:
print("Deleting all except " + str(dataset))
try:
for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced):
print("Testing: " + str(d))
if d != dataset:
print("Deleting: " + str(d))
d.delete()
except:
except Exception as e:
print("Error when trying to delete old datasets")
print(e)
continue
def remove_old_drdi_data(new_datasets, licenced):
for dataset in new_datasets:
print("Deleting all except " + str(dataset))
try:
for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced):
print("Testing: " + str(d))
if d != dataset:
print("Deleting: " + str(d))
d.delete()
except:
except Exception as e:
print("Error when trying to delete old datasets")
print(e)
continue
......@@ -6,6 +6,7 @@ from django.db import models
class PPIDataset(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=128, default='', unique=False)
link = models.CharField(max_length=128, default='', unique=False)
version = models.CharField(max_length=128, default='', unique=False)
......@@ -19,6 +20,7 @@ class PPIDataset(models.Model):
class PDIDataset(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=128, default='', unique=False)
link = models.CharField(max_length=128, default='', unique=False)
version = models.CharField(max_length=128, default='', unique=False)
......@@ -32,6 +34,7 @@ class PDIDataset(models.Model):
class PDisDataset(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=128, default='', unique=False)
link = models.CharField(max_length=128, default='', unique=False)
version = models.CharField(max_length=128, default='', unique=False)
......@@ -45,6 +48,7 @@ class PDisDataset(models.Model):
class DrDiDataset(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=128, default='', unique=False)
link = models.CharField(max_length=128, default='', unique=False)
version = models.CharField(max_length=128, default='', unique=False)
......@@ -58,6 +62,7 @@ class DrDiDataset(models.Model):
class EnsemblGene(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=15) # starts with ENSG...
protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='ensg')
......@@ -65,7 +70,7 @@ class EnsemblGene(models.Model):
class Protein(models.Model):
# According to https://www.uniprot.org/help/accession_numbers UniProt accession codes
# are either 6 or 10 characters long
id = models.AutoField(primary_key=True)
uniprot_code = models.CharField(max_length=10)
gene = models.CharField(max_length=127, default='') # symbol
protein_name = models.CharField(max_length=255, default='')
......@@ -98,6 +103,7 @@ class Protein(models.Model):
class ExpressionLevel(models.Model):
id = models.AutoField(primary_key=True)
tissue = models.ForeignKey('Tissue', on_delete=models.CASCADE)
protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
expression_level = models.FloatField()
......@@ -110,6 +116,7 @@ class ExpressionLevel(models.Model):
class Tissue(models.Model):
id = models.AutoField(primary_key=True)
name = models.CharField(max_length=128, default='', unique=True)
def __str__(self):
......@@ -117,6 +124,7 @@ class Tissue(models.Model):
class Disorder(models.Model):
id = models.AutoField(primary_key=True)
mondo_id = models.CharField(max_length=7)
label = models.CharField(max_length=256, default='') # symbol
icd10 = models.CharField(max_length=512, default='')
......@@ -145,6 +153,7 @@ class Disorder(models.Model):
class Drug(models.Model):
id = models.AutoField(primary_key=True)
drug_id = models.CharField(max_length=10, unique=True)
name = models.CharField(max_length=256, default='')
status = models.CharField(max_length=128, default='')
......@@ -172,6 +181,7 @@ class Drug(models.Model):
class ProteinDisorderAssociation(models.Model):
id = models.BigAutoField(primary_key=True)
pdis_dataset = models.ForeignKey(
'PDisDataset', null=True, on_delete=models.CASCADE, related_name='pdis_dataset_relation')
protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
......@@ -195,6 +205,7 @@ class ProteinDisorderAssociation(models.Model):
class DrugDisorderIndication(models.Model):
id = models.AutoField(primary_key=True)
drdi_dataset = models.ForeignKey(
'DrDiDataset', null=True, on_delete=models.CASCADE, related_name='drdi_dataset_relation')
drug = models.ForeignKey('Drug', on_delete=models.CASCADE)
......@@ -217,6 +228,7 @@ class DrugDisorderIndication(models.Model):
class ProteinProteinInteraction(models.Model):
id = models.BigAutoField(primary_key=True)
ppi_dataset = models.ForeignKey(
'PPIDataset', null=True, on_delete=models.CASCADE, related_name='ppi_dataset_relation')
from_protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='interacting_proteins_out')
......@@ -255,6 +267,7 @@ class ProteinProteinInteraction(models.Model):
class ProteinDrugInteraction(models.Model):
id = models.BigAutoField(primary_key=True)
pdi_dataset = models.ForeignKey(
PDIDataset, null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation')
protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
......@@ -277,7 +290,7 @@ class ProteinDrugInteraction(models.Model):
class Task(models.Model):
token = models.CharField(max_length=32, unique=True)
token = models.CharField(max_length=32, unique=True, primary_key=True)
created_at = models.DateTimeField(auto_now_add=True)
target = models.CharField(max_length=32, choices=[('drug', 'Drug'), ('drug-target', 'Drug Target')])
......
......@@ -3,6 +3,7 @@ from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = {
'update_db': {
'task': 'drugstone.tasks.task_update_db_from_nedrex',
'schedule': crontab(day_of_week=1, hour=5, minute=0),
'schedule': crontab(day_of_week=2, hour=3, minute=0),
# 'schedule': crontab(minute='*/1'),
},
}
......@@ -19,12 +19,13 @@ from django.urls import path
from drugstone.views import map_nodes, tasks_view, result_view, \
graph_export, TissueView, TissueExpressionView, query_tissue_proteins, TaskView, \
adjacent_drugs, adjacent_disorders, fetch_edges, create_network, load_network, get_license, get_datasets, \
get_max_tissue_expression
get_max_tissue_expression, convert_compact_ids
# cache time is 6 hours
urlpatterns = [
path('get_datasets/', get_datasets),
path('map_nodes/', map_nodes),
path('convert_compact_node_list/', convert_compact_ids),
path('fetch_edges/', fetch_edges),
path('task/', TaskView.as_view()),
path('tasks/', tasks_view),
......
......@@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
elif identifier == 'uniprot':
protein_attribute = 'uniprot_ac'
q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
elif identifier == 'ensg':
elif identifier == 'ensg' or identifier == 'ensembl':
protein_attribute = 'ensg'
dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids)))))
q_list = map(lambda n: Q(id=n), dr_ids)
elif identifier == 'entrez':
elif identifier == 'entrez' or identifier == 'ncbigene':
protein_attribute = 'entrez'
q_list = map(lambda n: Q(entrez=n), node_ids)
if not node_ids:
......@@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
nodes = list()
node_map = defaultdict(list)
if identifier == 'ensg':
if protein_attribute == 'ensg':
for node in ProteinSerializer(many=True).to_representation(node_objects):
for ensembl_id in node.get(protein_attribute):
if ensembl_id.upper() in node_ids:
......@@ -64,6 +64,83 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
return nodes, protein_attribute
def get_protein_ids(id_space, proteins):
if (id_space == 'uniprot'):
return [p['uniprot_ac'] for p in proteins]
if (id_space == 'ensg' or id_space == 'ensembl'):
return [p['ensg'] for p in proteins]
if (id_space == 'symbol'):
return [p['symbol'] for p in proteins]
if (id_space == 'entrez' or id_space == 'ncbigene'):
return [p['entrez'] for p in proteins]
return set()
def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> List[str]:
"""Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name
(identifier).
The identifier name represents any protein attribute, e.g. uniprot or symbol.
The identifier names vary from the Protein table names since they are the strings which are set by the user
in the frontend, for readability they were changes from the original backend attributes.
Args:
node_ids (list): List of protein or gene identifiers. Note: Do not mix identifiers.
identifier (str): Can be one of "symbol", "ensg", "uniprot"
Returns:
Tuple[List[dict], str]:
Returns list of serialized protein entries for all matched IDs
Returns name of backend attribute of Protein table
"""
# query protein table
if len(node_ids) == 0:
return list()
symbol_set, ensg_set, uniprot_set, entrez_set = set(), set(), set(), set()
id_map = {
'symbol:': symbol_set,
'uniprot:': uniprot_set,
'ensg:': ensg_set,
'ncbigene:': entrez_set,
'ensembl:': ensg_set,
'entrez:': entrez_set
}
clean_ids = set()
for node_id in node_ids:
added = False
for id_space in id_map.keys():
if node_id.startswith(id_space):
id_map[id_space].add(node_id[len(id_space):].upper())
added = True
break
if not added:
clean_ids.add(node_id)
for id_space, ids in id_map.items():
if len(ids) == 0:
continue
if id_space == 'symbol:':
q_list = map(lambda n: Q(gene__iexact=n), ids)
elif id_space == 'uniprot:':
q_list = map(lambda n: Q(uniprot_code__iexact=n), ids)
elif id_space == 'ensg:':
ensembls = EnsemblGene.objects.filter(reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), ids)))
if len(ensembls) == 0:
continue
dr_ids = map(lambda n: n.protein_id, ensembls)
q_list = map(lambda n: Q(id=n), dr_ids)
elif id_space == 'entrez:':
q_list = map(lambda n: Q(entrez=n), ids)
else:
continue
q_list = reduce(lambda a, b: a | b, q_list)
proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list))
clean_ids = clean_ids.union(get_protein_ids(identifier, proteins))
return list(clean_ids)
def aggregate_nodes(nodes: List[OrderedDict]):
node = defaultdict(set)
for n in nodes:
......
......@@ -15,7 +15,7 @@ from django.db import IntegrityError
from rest_framework.decorators import api_view
from rest_framework.response import Response
from rest_framework.views import APIView
from drugstone.util.query_db import query_proteins_by_identifier
from drugstone.util.query_db import query_proteins_by_identifier, clean_proteins_from_compact_notation
from drugstone.models import *
from drugstone.serializers import *
......@@ -25,43 +25,31 @@ from drugstone.settings import DEFAULTS
def get_ppi_ds(source, licenced):
try:
ds = models.PPIDataset.objects.filter(name__iexact=source, licenced=licenced).last()
return ds
except:
if licenced:
if ds is None and licenced:
return get_ppi_ds(source, False)
return None
return ds
def get_pdi_ds(source, licenced):
try:
ds = models.PDIDataset.objects.filter(name__iexact=source, licenced=licenced).last()
return ds
except:
if licenced:
if ds is None and licenced:
return get_pdi_ds(source, False)
return None
return ds
def get_pdis_ds(source, licenced):
try:
ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last()
return ds
except:
if licenced:
if ds is None and licenced:
return get_pdis_ds(source, False)
return None
return ds
def get_drdis_ds(source, licenced):
try:
ds = models.DrDiDataset.objects.filter(name__iexact=source, licenced=licenced).last()
return ds
except:
if licenced:
if ds is None and licenced:
return get_drdis_ds(source, False)
return None
return ds
class TaskView(APIView):
......@@ -144,6 +132,14 @@ def fetch_edges(request) -> Response:
return Response(ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects))
@api_view(['POST'])
def convert_compact_ids(request) -> Response:
nodes = request.data.get('nodes', '[]')
identifier = request.data.get('identifier', '')
cleaned = clean_proteins_from_compact_notation(nodes, identifier)
return Response(cleaned)
@api_view(['POST'])
def map_nodes(request) -> Response:
"""Maps user given input nodes to Proteins in the django database.
......@@ -175,7 +171,8 @@ def map_nodes(request) -> Response:
nodes_mapped, id_key = query_proteins_by_identifier(node_ids, identifier)
# change data structure to dict in order to be quicker when merging
nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped}
nodes_mapped_dict = {id.upper(): node for node in nodes_mapped for id in node[id_key]}
print(nodes_mapped_dict)
# merge fetched data with given data to avoid data loss
for node in nodes:
......@@ -451,7 +448,7 @@ def result_view(request) -> Response:
else:
keys = []
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = f'attachment; filename="{task.id}_{view}.csv"'
response['Content-Disposition'] = f'attachment; filename="{task.token}_{view}.csv"'
dict_writer = csv.DictWriter(response, keys)
dict_writer.writeheader()
dict_writer.writerows(items)
......@@ -466,6 +463,9 @@ def graph_export(request) -> Response:
Recieve whole graph data and write it to graphml file. Return the
file ready to download.
"""
remove_node_properties = ['color', 'shape', 'border_width', 'group_name', 'border_width_selected', 'shadow',
'group_id', 'drugstone_type', 'font', 'x', 'y']
remove_edge_properties = ['group_name', 'color', 'dashes', 'shadow', 'id']
nodes = request.data.get('nodes', [])
edges = request.data.get('edges', [])
fmt = request.data.get('fmt', 'graphml')
......@@ -473,6 +473,9 @@ def graph_export(request) -> Response:
node_map = dict()
for node in nodes:
# networkx does not support datatypes such as lists or dicts
for prop in remove_node_properties:
if prop in node:
del node[prop]
for key in list(node.keys()):
if isinstance(node[key], list) or isinstance(node[key], dict):
node[key] = json.dumps(node[key])
......@@ -491,6 +494,9 @@ def graph_export(request) -> Response:
for e in edges:
# networkx does not support datatypes such as lists or dicts
for prop in remove_edge_properties:
if prop in e:
del e[prop]
for key in e:
if isinstance(e[key], list) or isinstance(e[key], dict):
e[key] = json.dumps(e[key])
......@@ -509,23 +515,21 @@ def graph_export(request) -> Response:
data = nx.readwrite.json_graph.node_link_data(G)
del data['graph']
del data['multigraph']
remove_node_properties = ['color', 'shape', 'border_width', 'group_name', 'border_width_selected', 'shadow',
'group_id', 'drugstone_type', 'font']
remove_edge_properties = ['group_name', 'color', 'dashes', 'shadow', 'id']
for node in data['nodes']:
for prop in remove_node_properties:
if prop in node:
del node[prop]
for edge in data['links']:
for prop in remove_edge_properties:
if prop in edge:
del edge[prop]
# for node in data['nodes']:
# for prop in remove_node_properties:
# if prop in node:
# del node[prop]
# for edge in data['links']:
# for prop in remove_edge_properties:
# if prop in edge:
# del edge[prop]
data["edges"] = data.pop("links")
data = json.dumps(data)
data = data.replace('"{', '{').replace('}"', '}').replace('"[', '[').replace(']"', ']').replace('\\"', '"')
response = HttpResponse(data, content_type='application/json')
elif fmt == 'csv':
data = pd.DataFrame(nx.to_numpy_array(G), columns=G.nodes(), index=G.nodes())
data = pd.DataFrame(nx.to_numpy_array(G), columns=G.nodes(), index=G.nodes(), dtype=int)
response = HttpResponse(data.to_csv(), content_type='text/csv')
response['content-disposition'] = f'attachment; filename="{int(time.time())}_network.{fmt}"'
......@@ -659,62 +663,25 @@ class TissueExpressionView(APIView):
Expression of host proteins in tissues.
"""
def post(self, request) -> Response:
tissue = Tissue.objects.get(id=request.data.get('tissue'))
if request.data.get('proteins'):
ids = json.loads(request.data.get('proteins'))
proteins = list(Protein.objects.filter(id__in=ids).all())
elif request.data.get('token'):
proteins = []
task = Task.objects.get(token=request.data['token'])
result = task_result(task)
network = result['network']
node_attributes = result.get('node_attributes')
if not node_attributes:
node_attributes = {}
node_types = node_attributes.get('node_types')
if not node_types:
node_types = {}
parameters = json.loads(task.parameters)
seeds = parameters['seeds']
nodes = network['nodes']
for node in nodes + seeds:
node_type = node_types.get(node)
details = None
if node_type == 'protein':
if details:
proteins.append(details)
else:
try:
prot = Protein.objects.get(uniprot_code=node)
if prot not in proteins:
proteins.append(Protein.objects.get(uniprot_code=node))
except Protein.DoesNotExist:
pass
pt_expressions = {}
for protein in proteins:
try:
expression_level = ExpressionLevel.objects.get(protein=protein, tissue=tissue)
pt_expressions[
ProteinSerializer().to_representation(protein)['drugstone_id']] = expression_level.expression_level
except ExpressionLevel.DoesNotExist:
pt_expressions[ProteinSerializer().to_representation(protein)['drugstone_id']] = None
return Response(pt_expressions)
def get(self, request) -> Response:
tissue = Tissue.objects.get(id=request.query_params.get('tissue'))
proteins = request.query_params.get('proteins')
token = request.query_params.get('token')
return self.get_tissue_expression(tissue, proteins, token)
def post(self, request) -> Response:
tissue = Tissue.objects.get(id=request.data.get('tissue'))
proteins = request.data.get('proteins')
token = request.data.get('token')
return self.get_tissue_expression(tissue, proteins, token)
if request.data.get('proteins'):
ids = json.loads(request.data.get('proteins'))
def get_tissue_expression(self, tissue, proteins, token):
if proteins is not None:
ids = json.loads(proteins)
proteins = list(Protein.objects.filter(id__in=ids).all())
elif request.data.get('token'):
elif token is not None:
proteins = []
task = Task.objects.get(token=request.data['token'])
task = Task.objects.get(token=token)
result = task_result(task)
network = result['network']
node_attributes = result.get('node_attributes')
......
asgiref==3.2.7
certifi==2020.6.20
asgiref==3.5.2
celery==5.2.7
certifi==2022.12.7
chardet==3.0.4
click==7.1.2
click==8.1.3
cryptography==38.0.3
decorator==4.4.2
Django==3.0.5
Django==3.2.16
django-cors-headers==3.4.0
django-redis==4.11.0
django-rq-dashboard==0.3.3
djangorestframework==3.11.0
djangorestframework==3.11.2
djangorestframework-camel-case==1.1.2
entrypoints==0.3
flake8==3.7.9
flake8-quotes==3.0.0
idna==2.10
mccabe==0.6.1
networkx==2.2
numpy
pandas
networkx==2.8.8
numpy==1.23.5
pandas==1.3.5
pillow==9.3.0
psycopg2-binary==2.8.6
pycodestyle==2.5.0
pyflakes==2.1.1
python-dateutil==2.8.1
pytz==2019.3
redis==3.4.1
requests
rq==1.3.0
pytz==2021.3
redis==3.5.3
requests==2.28.1
rq==1.11.1
six==1.15.0
sqlparse==0.3.1
urllib3==1.25.10
sqlalchemy==1.3.23
celery==5.1.2
\ No newline at end of file
sqlparse==0.4.2
urllib3==1.26.12
......@@ -4,7 +4,11 @@ python3 manage.py makemigrations drugstone
python3 manage.py migrate
python3 manage.py createfixtures
python3 manage.py cleanuptasks
#python3 manage.py populate_db --update -a
#python3 manage.py make_graphs
if [ -z "$DB_UPDATE_ON_START" ] || [ "$DB_UPDATE_ON_START" = "0" ]
then
echo "Update on startup disabled!"
else
python3 manage.py populate_db --update -a
python3 manage.py make_graphs
fi
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
......@@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook):
if ppi_dataset['licenced'] or pdi_dataset['licenced']:
filename += "_licenced"
filename = os.path.join(task_hook.data_directory, filename + ".gt")
print(filename)
g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target)
if custom_edges:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment