diff --git a/Dockerfile b/Dockerfile index 6abc552ed463399196d426fb23663921f759c811..924a5c6a9c40239fd653269fbdc6594dee2a4abb 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM registry.blitzhub.io/conda_miniconda3 +FROM andimajore/miniconda3_kinetic WORKDIR /usr/src/drugstone/ @@ -24,8 +24,3 @@ COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf RUN pip install nedrex COPY . /usr/src/drugstone/ - - -#EXPOSE 8000 - -# ENTRYPOINT ["sh", "/entrypoint.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index 4e8688633619205a4b2cba1d6684d1a05feab2d7..d98807febe8521b04b021502d07e3e2de9ce1bb1 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -83,15 +83,15 @@ services: - db networks: - drugstone_net - flower: - image: mher/flower - container_name: drugstone_flower - env_file: - - './docker-django.env.dev' - ports: - - 8888:8888 - networks: - - drugstone_net +# flower: +# image: mher/flower +# container_name: drugstone_flower +# env_file: +# - './docker-django.env.dev' +# ports: +# - 8888:8888 +# networks: +# - drugstone_net networks: drugstone_net: diff --git a/docker-django.env b/docker-django.env index 9bf4bf33e4554aafa7a51088eb57885b753ab769..defd3e70b9c0efba92870547a9b16ef0e84aecc5 100755 --- a/docker-django.env +++ b/docker-django.env @@ -12,4 +12,5 @@ REDIS_HOST=redis REDIS_PORT=6379 GT_THREADS=16 DJANGO_SETTINGS_MODULE=drugstone.settings -CELERY_BROKER_URL=redis://redis:6379/0 \ No newline at end of file +CELERY_BROKER_URL=redis://redis:6379/0 +DB_UPDATE_ON_START=0 \ No newline at end of file diff --git a/docker-django.env.dev b/docker-django.env.dev index 1f3d65dea9913d1be665c036f47ee209a4720f20..91d6c04b8f6b37a168562414a91cbc79cff2a676 100644 --- a/docker-django.env.dev +++ b/docker-django.env.dev @@ -14,4 +14,5 @@ DJANGO_SETTINGS_MODULE=drugstone.settings CELERY_BROKER_URL=redis://redis:6379/0 FLOWER_PORT=8888 FLOWER_BASIC_AUTH=drugstone:test -GT_THREADS=2 \ No newline at end of file +GT_THREADS=2 +DB_UPDATE_ON_START=0 \ No newline at end of file diff --git a/drugstone/backend_tasks.py b/drugstone/backend_tasks.py index a9aabcb202ba63eeb8c96769884d0788216de0d7..89dd52d40812252a265a5f485577da233ff9966f 100755 --- a/drugstone/backend_tasks.py +++ b/drugstone/backend_tasks.py @@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'), decode_responses=False) rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r) - r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'), port=os.getenv('REDIS_PORT', 6379), db=0, decode_responses=True) +identifier_map = { + 'ensembl': 'ensg', + 'ncbigene': 'entrez' +} + def run_task(token, algorithm, parameters): def set_progress(progress, status): @@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters): r.set(f'{token}_job_id', f'{job_id}') r.set(f'{token}_started_at', str(datetime.now().timestamp())) - task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result) + params = json.loads(parameters) + + params['config']['identifier'] = identifier_map.get(params['config']['identifier'], params['config']['identifier']) + + task_hook = TaskHook(params, './data/Networks/', set_progress, set_result) + + task_hook.parameters["config"].get("identifier", "symbol") try: if algorithm == 'dummy': @@ -100,7 +110,7 @@ def refresh_from_redis(task): def start_task(task): - job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30*60) + job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30 * 60) task.job_id = job.id diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py index bded48ddf6e1bcddbb4f7e3a5e89b3d97de78273..bc09491bb6f232a8f6bce5dbf71ef5866b9932a4 100644 --- a/drugstone/management/commands/import_from_nedrex.py +++ b/drugstone/management/commands/import_from_nedrex.py @@ -256,7 +256,7 @@ class NedrexImporter: iter_edge_collection('drug_has_target', add_dpi) models.ProteinDrugInteraction.objects.bulk_create(bulk) - new_datasets = [dataset, source_datasets.values()] + new_datasets = [dataset].extend(source_datasets.values()) DatasetLoader.remove_old_pdi_data(new_datasets, licenced) return len(bulk) diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py index 1a13741dc17a9d1bab5ef5bb6634580924e86baf..2ce582db53120edeaa4b126073d9fcf1ce26c816 100755 --- a/drugstone/management/commands/make_graphs.py +++ b/drugstone/management/commands/make_graphs.py @@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None: # extend node data by cancer nodes, we create a normal node for each cancer node. # on reading the data, we decide which one to keep based on the user selected cancer types - is_entrez = identifier == 'entrez' + is_entrez = (identifier == 'entrez' or identifier == 'ncbigene') is_symbol = identifier == 'symbol' is_uniprot = identifier == 'uniprot' - is_ensg = identifier == 'ensg' + is_ensg = (identifier == 'ensg' or identifier == 'ensembl') if is_ensg: ensembl_set = defaultdict(set) diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py index 94f64ce26a18c2afff81b2c5ef2e754a40f8aa25..4df7c6a14b3b63fbe61436033eeb4c7c97402b89 100755 --- a/drugstone/management/commands/populate_db.py +++ b/drugstone/management/commands/populate_db.py @@ -10,6 +10,8 @@ from drugstone.management.includes.DataPopulator import DataPopulator from .import_from_nedrex import NedrexImporter from drugstone.management.includes.NodeCache import NodeCache from drugstone.management.includes import DatasetLoader +from ..includes.DatasetLoader import remove_old_pdi_data, remove_old_ppi_data, remove_old_pdis_data, \ + remove_old_drdi_data class DatabasePopulator: @@ -59,7 +61,6 @@ class DatabasePopulator: class Command(BaseCommand): def add_arguments(self, parser): - # dataset directory parser.add_argument('-dd', '--data_dir', type=str, help='Dataset directory path') parser.add_argument('-dm', '--delete_model', type=str, help='Delete model(s)') @@ -80,19 +81,28 @@ class Command(BaseCommand): parser.add_argument('-pdi', '--protein_disorder', action='store_true', help='Populate Protein-Disorder Associations') parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications') + parser.add_argument('-t', '--test', action='store_true', help='Running some function on startup') def handle(self, *args, **kwargs): populate(kwargs) -def populate(kwargs): - nedrex_api_url_unlicenced= "https://nedrex-api-open.zbh.uni-hamburg.de/" - nedrex_api_url_licenced = "https://nedrex-api-licenced.zbh.uni-hamburg.de/" +def populate(kwargs): + nedrex_api_url_open = "https://api.nedrex.net/open" + nedrex_api_url_licensed = "https://api.nedrex.net/licensed" data_dir = kwargs['data_dir'] db_populator = DatabasePopulator(data_dir=data_dir) - + if 'test' in kwargs and kwargs['test']: + pass + # remove_old_ppi_data([PPIDataset.objects.filter(name='biogrid', licenced=False).last()], False) + # remove_old_ppi_data([PPIDataset.objects.filter(name='iid', licenced=False).last()], False) + # remove_old_ppi_data([PPIDataset.objects.filter(name='intact', licenced=False).last()], False) + # remove_old_pdis_data([PDisDataset.objects.filter(name='disgenet', licenced=False).last()], False) + # remove_old_pdis_data([PDisDataset.objects.filter(name='omim', licenced=True).last()], True) + # remove_old_drdi_data([DrDiDataset.objects.filter(name='ctd', licenced=False).last()], False) + # remove_old_drdi_data([DrDiDataset.objects.filter(name='drugcentral', licenced=False).last()], False) if 'clear' in kwargs and kwargs['clear']: db_populator.delete_all() @@ -102,7 +112,7 @@ def populate(kwargs): cache = NodeCache() update = True if kwargs['update'] else False - importer = NedrexImporter(nedrex_api_url_licenced, nedrex_api_url_unlicenced, cache) + importer = NedrexImporter(nedrex_api_url_licensed, nedrex_api_url_open, cache) populator = DataPopulator(cache) total_n = 0 @@ -121,7 +131,7 @@ def populate(kwargs): if kwargs['drugs']: print('Populating Drugs...') n = NedrexImporter.import_drugs(importer, update) - total_n +=n + total_n += n nedrex_update = True print(f'Populated {n} Drugs.') @@ -150,20 +160,22 @@ def populate(kwargs): print(f'Populated {n} Expressions.') if kwargs['protein_drug']: - print('Importing PDIs from unlicenced NeDRexDB...') + print('Importing PDIs from unlicensed NeDRexDB...') n = NedrexImporter.import_drug_target_interactions(importer, - DatasetLoader.get_drug_target_nedrex(nedrex_api_url_unlicenced, False), + DatasetLoader.get_drug_target_nedrex(nedrex_api_url_open, + False), update) total_n += n - print(f'Imported {n} PDIs from unlicenced NeDRexDB') + print(f'Imported {n} PDIs from unlicensed NeDRexDB') - print('Importing PDIs from licenced NeDRexDB...') + print('Importing PDIs from licensed NeDRexDB...') n = NedrexImporter.import_drug_target_interactions(importer, - DatasetLoader.get_drug_target_nedrex(nedrex_api_url_licenced, True), + DatasetLoader.get_drug_target_nedrex(nedrex_api_url_licensed, + True), update) total_n += n nedrex_update = True - print(f'Imported {n} PDIs from licenced NeDRexDB') + print(f'Imported {n} PDIs from licensed NeDRexDB') print('Populating PDIs from Chembl...') n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update) @@ -176,35 +188,36 @@ def populate(kwargs): print(f'Populated {n} PDIs from DGIdb.') if kwargs['protein_disorder']: - print('Importing PDis from unlicenced NeDRexDB...') + print('Importing PDis from unlicensed NeDRexDB...') n = NedrexImporter.import_protein_disorder_associations(importer, DatasetLoader.get_protein_disorder_nedrex( - nedrex_api_url_unlicenced, False), + nedrex_api_url_open, False), update) total_n += n - print(f'Imported {n} PDis from unlicenced NeDRexDB') + print(f'Imported {n} PDis from unlicensed NeDRexDB') print('Importing PDis from licenced NeDRexDB...') n = NedrexImporter.import_protein_disorder_associations(importer, DatasetLoader.get_protein_disorder_nedrex( - nedrex_api_url_licenced, True), + nedrex_api_url_licensed, True), update) total_n += n nedrex_update = True print(f'Imported {n} PDis from licenced NeDRexDB') if kwargs['drug_disorder']: - print('Importing DrDis from unlicenced NeDRexDB...') + print('Importing DrDis from unlicensed NeDRexDB...') n = NedrexImporter.import_drug_disorder_indications(importer, - DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url_unlicenced, False), + DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url_open, + False), update) total_n += n - print(f'Imported {n} DrDis from unlicenced NeDRexDB') + print(f'Imported {n} DrDis from unlicensed NeDRexDB') print('Importing DrDis from licenced NeDRexDB...') n = NedrexImporter.import_drug_disorder_indications(importer, DatasetLoader.get_drug_disorder_nedrex( - nedrex_api_url_licenced, True), + nedrex_api_url_licensed, True), update) total_n += n nedrex_update = True @@ -215,20 +228,20 @@ def populate(kwargs): print(f'Populated {n} DrDi associations from DrugBank.') if kwargs['protein_protein']: - print('Importing PPIs from unlicenced NeDRexDB...') + print('Importing PPIs from unlicensed NeDRexDB...') n = NedrexImporter.import_protein_protein_interactions(importer, - DatasetLoader.get_ppi_nedrex(nedrex_api_url_unlicenced, False), + DatasetLoader.get_ppi_nedrex(nedrex_api_url_open, False), update) total_n += n - print(f'Imported {n} PPIs from unlicended NeDRexDB') + print(f'Imported {n} PPIs from unlicensed NeDRexDB') print('Importing PPIs from licenced NeDRexDB...') n = NedrexImporter.import_protein_protein_interactions(importer, - DatasetLoader.get_ppi_nedrex(nedrex_api_url_licenced, + DatasetLoader.get_ppi_nedrex(nedrex_api_url_licensed, True), update) total_n += n nedrex_update = True - print(f'Imported {n} PPIs from licended NeDRexDB') + print(f'Imported {n} PPIs from licensed NeDRexDB') print('Populating PPIs from STRING...') n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update) total_n += n diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py index 804a3df0136edeaca6ce41444f0b1a12c48f4fb2..859fc1179fcd55d0c6e68675aad9a851d69ee31d 100644 --- a/drugstone/management/includes/DatasetLoader.py +++ b/drugstone/management/includes/DatasetLoader.py @@ -39,7 +39,9 @@ def get_ppi_biogrid(): def get_nedrex_version(): version = get_today_version() try: - version = get_metadata()['version'] + real_version = get_metadata()['version'] + if real_version != "0.0.0": + version = real_version except RetryError: pass return version @@ -50,7 +52,7 @@ def get_nedrex_source_version(source): # TODO remove once fixed in nedrex db if 'drug_central' in metadata: metadata['drugcentral'] = metadata['drug_central'] - + return metadata[source]['date'] @@ -300,39 +302,59 @@ def is_licenced_drdi_source(source): def remove_old_pdi_data(new_datasets, licenced): for dataset in new_datasets: + print("Deleting all except "+str(dataset)) try: for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced): + print("Testing: "+str(d)) if d != dataset: + print("Deleting: "+str(d)) d.delete() - except: + except Exception as e: + print("Error when trying to delete old datasets") + print(e) continue def remove_old_ppi_data(new_datasets, licenced): for dataset in new_datasets: + print("Deleting all except " + str(dataset)) try: for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced): + print("Testing: " + str(d)) if d != dataset: + print("Deleting: " + str(d)) d.delete() - except: + except Exception as e: + print("Error when trying to delete old datasets") + print(e) continue def remove_old_pdis_data(new_datasets, licenced): for dataset in new_datasets: + print("Deleting all except " + str(dataset)) try: for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced): + print("Testing: " + str(d)) if d != dataset: + print("Deleting: " + str(d)) d.delete() - except: + except Exception as e: + print("Error when trying to delete old datasets") + print(e) continue def remove_old_drdi_data(new_datasets, licenced): for dataset in new_datasets: + print("Deleting all except " + str(dataset)) try: for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced): + print("Testing: " + str(d)) if d != dataset: + print("Deleting: " + str(d)) d.delete() - except: + except Exception as e: + print("Error when trying to delete old datasets") + print(e) continue diff --git a/drugstone/models.py b/drugstone/models.py index 4a23d4519af0b9153cb2dedf6b6918c57bae578e..3c664322705ea5ab5c211f6ce6beabaebf9e4185 100755 --- a/drugstone/models.py +++ b/drugstone/models.py @@ -6,6 +6,7 @@ from django.db import models class PPIDataset(models.Model): + id = models.AutoField(primary_key=True) name = models.CharField(max_length=128, default='', unique=False) link = models.CharField(max_length=128, default='', unique=False) version = models.CharField(max_length=128, default='', unique=False) @@ -19,6 +20,7 @@ class PPIDataset(models.Model): class PDIDataset(models.Model): + id = models.AutoField(primary_key=True) name = models.CharField(max_length=128, default='', unique=False) link = models.CharField(max_length=128, default='', unique=False) version = models.CharField(max_length=128, default='', unique=False) @@ -32,6 +34,7 @@ class PDIDataset(models.Model): class PDisDataset(models.Model): + id = models.AutoField(primary_key=True) name = models.CharField(max_length=128, default='', unique=False) link = models.CharField(max_length=128, default='', unique=False) version = models.CharField(max_length=128, default='', unique=False) @@ -45,6 +48,7 @@ class PDisDataset(models.Model): class DrDiDataset(models.Model): + id = models.AutoField(primary_key=True) name = models.CharField(max_length=128, default='', unique=False) link = models.CharField(max_length=128, default='', unique=False) version = models.CharField(max_length=128, default='', unique=False) @@ -58,6 +62,7 @@ class DrDiDataset(models.Model): class EnsemblGene(models.Model): + id = models.AutoField(primary_key=True) name = models.CharField(max_length=15) # starts with ENSG... protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='ensg') @@ -65,7 +70,7 @@ class EnsemblGene(models.Model): class Protein(models.Model): # According to https://www.uniprot.org/help/accession_numbers UniProt accession codes # are either 6 or 10 characters long - + id = models.AutoField(primary_key=True) uniprot_code = models.CharField(max_length=10) gene = models.CharField(max_length=127, default='') # symbol protein_name = models.CharField(max_length=255, default='') @@ -98,6 +103,7 @@ class Protein(models.Model): class ExpressionLevel(models.Model): + id = models.AutoField(primary_key=True) tissue = models.ForeignKey('Tissue', on_delete=models.CASCADE) protein = models.ForeignKey('Protein', on_delete=models.CASCADE) expression_level = models.FloatField() @@ -110,6 +116,7 @@ class ExpressionLevel(models.Model): class Tissue(models.Model): + id = models.AutoField(primary_key=True) name = models.CharField(max_length=128, default='', unique=True) def __str__(self): @@ -117,6 +124,7 @@ class Tissue(models.Model): class Disorder(models.Model): + id = models.AutoField(primary_key=True) mondo_id = models.CharField(max_length=7) label = models.CharField(max_length=256, default='') # symbol icd10 = models.CharField(max_length=512, default='') @@ -145,6 +153,7 @@ class Disorder(models.Model): class Drug(models.Model): + id = models.AutoField(primary_key=True) drug_id = models.CharField(max_length=10, unique=True) name = models.CharField(max_length=256, default='') status = models.CharField(max_length=128, default='') @@ -172,6 +181,7 @@ class Drug(models.Model): class ProteinDisorderAssociation(models.Model): + id = models.BigAutoField(primary_key=True) pdis_dataset = models.ForeignKey( 'PDisDataset', null=True, on_delete=models.CASCADE, related_name='pdis_dataset_relation') protein = models.ForeignKey('Protein', on_delete=models.CASCADE) @@ -195,6 +205,7 @@ class ProteinDisorderAssociation(models.Model): class DrugDisorderIndication(models.Model): + id = models.AutoField(primary_key=True) drdi_dataset = models.ForeignKey( 'DrDiDataset', null=True, on_delete=models.CASCADE, related_name='drdi_dataset_relation') drug = models.ForeignKey('Drug', on_delete=models.CASCADE) @@ -217,6 +228,7 @@ class DrugDisorderIndication(models.Model): class ProteinProteinInteraction(models.Model): + id = models.BigAutoField(primary_key=True) ppi_dataset = models.ForeignKey( 'PPIDataset', null=True, on_delete=models.CASCADE, related_name='ppi_dataset_relation') from_protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='interacting_proteins_out') @@ -255,6 +267,7 @@ class ProteinProteinInteraction(models.Model): class ProteinDrugInteraction(models.Model): + id = models.BigAutoField(primary_key=True) pdi_dataset = models.ForeignKey( PDIDataset, null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation') protein = models.ForeignKey('Protein', on_delete=models.CASCADE) @@ -277,7 +290,7 @@ class ProteinDrugInteraction(models.Model): class Task(models.Model): - token = models.CharField(max_length=32, unique=True) + token = models.CharField(max_length=32, unique=True, primary_key=True) created_at = models.DateTimeField(auto_now_add=True) target = models.CharField(max_length=32, choices=[('drug', 'Drug'), ('drug-target', 'Drug Target')]) diff --git a/drugstone/settings/celery_schedule.py b/drugstone/settings/celery_schedule.py index b066327c53c3c8f9beb7f969723311d538371221..6c068944fe4b6bf516a180d5677b32c2ff0d7250 100644 --- a/drugstone/settings/celery_schedule.py +++ b/drugstone/settings/celery_schedule.py @@ -3,6 +3,7 @@ from celery.schedules import crontab CELERY_BEAT_SCHEDULE = { 'update_db': { 'task': 'drugstone.tasks.task_update_db_from_nedrex', - 'schedule': crontab(day_of_week=1, hour=5, minute=0), + 'schedule': crontab(day_of_week=2, hour=3, minute=0), + # 'schedule': crontab(minute='*/1'), }, } diff --git a/drugstone/urls.py b/drugstone/urls.py index 7e0d16a49c23fdbbccef3c1f1d6f39fe833eb162..680f18bf922ca024b4b952e7ba883faef65f1c33 100755 --- a/drugstone/urls.py +++ b/drugstone/urls.py @@ -19,12 +19,13 @@ from django.urls import path from drugstone.views import map_nodes, tasks_view, result_view, \ graph_export, TissueView, TissueExpressionView, query_tissue_proteins, TaskView, \ adjacent_drugs, adjacent_disorders, fetch_edges, create_network, load_network, get_license, get_datasets, \ - get_max_tissue_expression + get_max_tissue_expression, convert_compact_ids # cache time is 6 hours urlpatterns = [ path('get_datasets/', get_datasets), path('map_nodes/', map_nodes), + path('convert_compact_node_list/', convert_compact_ids), path('fetch_edges/', fetch_edges), path('task/', TaskView.as_view()), path('tasks/', tasks_view), diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index 39171f50a81c7d9f8daaa4ef61658d34eadd4b5a..1a63b069a6c1bb835ad1a64140352f997a5e2cc6 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L elif identifier == 'uniprot': protein_attribute = 'uniprot_ac' q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids) - elif identifier == 'ensg': + elif identifier == 'ensg' or identifier == 'ensembl': protein_attribute = 'ensg' dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter( reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids))))) q_list = map(lambda n: Q(id=n), dr_ids) - elif identifier == 'entrez': + elif identifier == 'entrez' or identifier == 'ncbigene': protein_attribute = 'entrez' q_list = map(lambda n: Q(entrez=n), node_ids) if not node_ids: @@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L nodes = list() node_map = defaultdict(list) - if identifier == 'ensg': + if protein_attribute == 'ensg': for node in ProteinSerializer(many=True).to_representation(node_objects): for ensembl_id in node.get(protein_attribute): if ensembl_id.upper() in node_ids: @@ -64,6 +64,83 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L return nodes, protein_attribute +def get_protein_ids(id_space, proteins): + if (id_space == 'uniprot'): + return [p['uniprot_ac'] for p in proteins] + if (id_space == 'ensg' or id_space == 'ensembl'): + return [p['ensg'] for p in proteins] + if (id_space == 'symbol'): + return [p['symbol'] for p in proteins] + if (id_space == 'entrez' or id_space == 'ncbigene'): + return [p['entrez'] for p in proteins] + return set() + + +def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> List[str]: + """Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name + (identifier). + The identifier name represents any protein attribute, e.g. uniprot or symbol. + The identifier names vary from the Protein table names since they are the strings which are set by the user + in the frontend, for readability they were changes from the original backend attributes. + + Args: + node_ids (list): List of protein or gene identifiers. Note: Do not mix identifiers. + identifier (str): Can be one of "symbol", "ensg", "uniprot" + + Returns: + Tuple[List[dict], str]: + Returns list of serialized protein entries for all matched IDs + Returns name of backend attribute of Protein table + """ + # query protein table + if len(node_ids) == 0: + return list() + + symbol_set, ensg_set, uniprot_set, entrez_set = set(), set(), set(), set() + + id_map = { + 'symbol:': symbol_set, + 'uniprot:': uniprot_set, + 'ensg:': ensg_set, + 'ncbigene:': entrez_set, + 'ensembl:': ensg_set, + 'entrez:': entrez_set + } + clean_ids = set() + for node_id in node_ids: + added = False + for id_space in id_map.keys(): + if node_id.startswith(id_space): + id_map[id_space].add(node_id[len(id_space):].upper()) + added = True + break + if not added: + clean_ids.add(node_id) + + for id_space, ids in id_map.items(): + if len(ids) == 0: + continue + if id_space == 'symbol:': + q_list = map(lambda n: Q(gene__iexact=n), ids) + elif id_space == 'uniprot:': + q_list = map(lambda n: Q(uniprot_code__iexact=n), ids) + elif id_space == 'ensg:': + ensembls = EnsemblGene.objects.filter(reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), ids))) + if len(ensembls) == 0: + continue + dr_ids = map(lambda n: n.protein_id, ensembls) + q_list = map(lambda n: Q(id=n), dr_ids) + elif id_space == 'entrez:': + q_list = map(lambda n: Q(entrez=n), ids) + else: + continue + q_list = reduce(lambda a, b: a | b, q_list) + proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list)) + clean_ids = clean_ids.union(get_protein_ids(identifier, proteins)) + + return list(clean_ids) + + def aggregate_nodes(nodes: List[OrderedDict]): node = defaultdict(set) for n in nodes: diff --git a/drugstone/views.py b/drugstone/views.py index eb86c9de4af8be042065ba9e52f330465aee7ed8..5024babdb97b772f2e90d2b6e3bf2beff13c5a88 100755 --- a/drugstone/views.py +++ b/drugstone/views.py @@ -15,7 +15,7 @@ from django.db import IntegrityError from rest_framework.decorators import api_view from rest_framework.response import Response from rest_framework.views import APIView -from drugstone.util.query_db import query_proteins_by_identifier +from drugstone.util.query_db import query_proteins_by_identifier, clean_proteins_from_compact_notation from drugstone.models import * from drugstone.serializers import * @@ -25,43 +25,31 @@ from drugstone.settings import DEFAULTS def get_ppi_ds(source, licenced): - try: - ds = models.PPIDataset.objects.filter(name__iexact=source, licenced=licenced).last() - return ds - except: - if licenced: - return get_ppi_ds(source, False) - return None + ds = models.PPIDataset.objects.filter(name__iexact=source, licenced=licenced).last() + if ds is None and licenced: + return get_ppi_ds(source, False) + return ds def get_pdi_ds(source, licenced): - try: - ds = models.PDIDataset.objects.filter(name__iexact=source, licenced=licenced).last() - return ds - except: - if licenced: - return get_pdi_ds(source, False) - return None + ds = models.PDIDataset.objects.filter(name__iexact=source, licenced=licenced).last() + if ds is None and licenced: + return get_pdi_ds(source, False) + return ds def get_pdis_ds(source, licenced): - try: - ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last() - return ds - except: - if licenced: - return get_pdis_ds(source, False) - return None + ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last() + if ds is None and licenced: + return get_pdis_ds(source, False) + return ds def get_drdis_ds(source, licenced): - try: - ds = models.DrDiDataset.objects.filter(name__iexact=source, licenced=licenced).last() - return ds - except: - if licenced: - return get_drdis_ds(source, False) - return None + ds = models.DrDiDataset.objects.filter(name__iexact=source, licenced=licenced).last() + if ds is None and licenced: + return get_drdis_ds(source, False) + return ds class TaskView(APIView): @@ -144,6 +132,14 @@ def fetch_edges(request) -> Response: return Response(ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects)) +@api_view(['POST']) +def convert_compact_ids(request) -> Response: + nodes = request.data.get('nodes', '[]') + identifier = request.data.get('identifier', '') + cleaned = clean_proteins_from_compact_notation(nodes, identifier) + return Response(cleaned) + + @api_view(['POST']) def map_nodes(request) -> Response: """Maps user given input nodes to Proteins in the django database. @@ -175,7 +171,8 @@ def map_nodes(request) -> Response: nodes_mapped, id_key = query_proteins_by_identifier(node_ids, identifier) # change data structure to dict in order to be quicker when merging - nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped} + nodes_mapped_dict = {id.upper(): node for node in nodes_mapped for id in node[id_key]} + print(nodes_mapped_dict) # merge fetched data with given data to avoid data loss for node in nodes: @@ -451,7 +448,7 @@ def result_view(request) -> Response: else: keys = [] response = HttpResponse(content_type='text/csv') - response['Content-Disposition'] = f'attachment; filename="{task.id}_{view}.csv"' + response['Content-Disposition'] = f'attachment; filename="{task.token}_{view}.csv"' dict_writer = csv.DictWriter(response, keys) dict_writer.writeheader() dict_writer.writerows(items) @@ -466,6 +463,9 @@ def graph_export(request) -> Response: Recieve whole graph data and write it to graphml file. Return the file ready to download. """ + remove_node_properties = ['color', 'shape', 'border_width', 'group_name', 'border_width_selected', 'shadow', + 'group_id', 'drugstone_type', 'font', 'x', 'y'] + remove_edge_properties = ['group_name', 'color', 'dashes', 'shadow', 'id'] nodes = request.data.get('nodes', []) edges = request.data.get('edges', []) fmt = request.data.get('fmt', 'graphml') @@ -473,6 +473,9 @@ def graph_export(request) -> Response: node_map = dict() for node in nodes: # networkx does not support datatypes such as lists or dicts + for prop in remove_node_properties: + if prop in node: + del node[prop] for key in list(node.keys()): if isinstance(node[key], list) or isinstance(node[key], dict): node[key] = json.dumps(node[key]) @@ -491,6 +494,9 @@ def graph_export(request) -> Response: for e in edges: # networkx does not support datatypes such as lists or dicts + for prop in remove_edge_properties: + if prop in e: + del e[prop] for key in e: if isinstance(e[key], list) or isinstance(e[key], dict): e[key] = json.dumps(e[key]) @@ -509,23 +515,21 @@ def graph_export(request) -> Response: data = nx.readwrite.json_graph.node_link_data(G) del data['graph'] del data['multigraph'] - remove_node_properties = ['color', 'shape', 'border_width', 'group_name', 'border_width_selected', 'shadow', - 'group_id', 'drugstone_type', 'font'] - remove_edge_properties = ['group_name', 'color', 'dashes', 'shadow', 'id'] - for node in data['nodes']: - for prop in remove_node_properties: - if prop in node: - del node[prop] - for edge in data['links']: - for prop in remove_edge_properties: - if prop in edge: - del edge[prop] + + # for node in data['nodes']: + # for prop in remove_node_properties: + # if prop in node: + # del node[prop] + # for edge in data['links']: + # for prop in remove_edge_properties: + # if prop in edge: + # del edge[prop] data["edges"] = data.pop("links") data = json.dumps(data) data = data.replace('"{', '{').replace('}"', '}').replace('"[', '[').replace(']"', ']').replace('\\"', '"') response = HttpResponse(data, content_type='application/json') elif fmt == 'csv': - data = pd.DataFrame(nx.to_numpy_array(G), columns=G.nodes(), index=G.nodes()) + data = pd.DataFrame(nx.to_numpy_array(G), columns=G.nodes(), index=G.nodes(), dtype=int) response = HttpResponse(data.to_csv(), content_type='text/csv') response['content-disposition'] = f'attachment; filename="{int(time.time())}_network.{fmt}"' @@ -659,62 +663,25 @@ class TissueExpressionView(APIView): Expression of host proteins in tissues. """ - def post(self, request) -> Response: - tissue = Tissue.objects.get(id=request.data.get('tissue')) - - if request.data.get('proteins'): - ids = json.loads(request.data.get('proteins')) - proteins = list(Protein.objects.filter(id__in=ids).all()) - elif request.data.get('token'): - proteins = [] - task = Task.objects.get(token=request.data['token']) - result = task_result(task) - network = result['network'] - node_attributes = result.get('node_attributes') - if not node_attributes: - node_attributes = {} - node_types = node_attributes.get('node_types') - if not node_types: - node_types = {} - parameters = json.loads(task.parameters) - seeds = parameters['seeds'] - nodes = network['nodes'] - for node in nodes + seeds: - node_type = node_types.get(node) - details = None - if node_type == 'protein': - if details: - proteins.append(details) - else: - try: - prot = Protein.objects.get(uniprot_code=node) - if prot not in proteins: - proteins.append(Protein.objects.get(uniprot_code=node)) - except Protein.DoesNotExist: - pass - - pt_expressions = {} - - for protein in proteins: - try: - expression_level = ExpressionLevel.objects.get(protein=protein, tissue=tissue) - pt_expressions[ - ProteinSerializer().to_representation(protein)['drugstone_id']] = expression_level.expression_level - except ExpressionLevel.DoesNotExist: - pt_expressions[ProteinSerializer().to_representation(protein)['drugstone_id']] = None - - return Response(pt_expressions) - + def get(self, request) -> Response: + tissue = Tissue.objects.get(id=request.query_params.get('tissue')) + proteins = request.query_params.get('proteins') + token = request.query_params.get('token') + return self.get_tissue_expression(tissue, proteins, token) def post(self, request) -> Response: tissue = Tissue.objects.get(id=request.data.get('tissue')) + proteins = request.data.get('proteins') + token = request.data.get('token') + return self.get_tissue_expression(tissue, proteins, token) - if request.data.get('proteins'): - ids = json.loads(request.data.get('proteins')) + def get_tissue_expression(self, tissue, proteins, token): + if proteins is not None: + ids = json.loads(proteins) proteins = list(Protein.objects.filter(id__in=ids).all()) - elif request.data.get('token'): + elif token is not None: proteins = [] - task = Task.objects.get(token=request.data['token']) + task = Task.objects.get(token=token) result = task_result(task) network = result['network'] node_attributes = result.get('node_attributes') diff --git a/requirements.txt b/requirements.txt index 25c7c16152d39f4a5b04504e76122206731ad0c5..e2ca8b1400cb459de5d3df84f22a841954ed7b9c 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,32 +1,34 @@ -asgiref==3.2.7 -certifi==2020.6.20 +asgiref==3.5.2 +celery==5.2.7 +certifi==2022.12.7 chardet==3.0.4 -click==7.1.2 +click==8.1.3 +cryptography==38.0.3 decorator==4.4.2 -Django==3.0.5 +Django==3.2.16 django-cors-headers==3.4.0 django-redis==4.11.0 django-rq-dashboard==0.3.3 -djangorestframework==3.11.0 +djangorestframework==3.11.2 djangorestframework-camel-case==1.1.2 entrypoints==0.3 flake8==3.7.9 flake8-quotes==3.0.0 idna==2.10 mccabe==0.6.1 -networkx==2.2 -numpy -pandas +networkx==2.8.8 +numpy==1.23.5 +pandas==1.3.5 +pillow==9.3.0 psycopg2-binary==2.8.6 pycodestyle==2.5.0 pyflakes==2.1.1 python-dateutil==2.8.1 -pytz==2019.3 -redis==3.4.1 -requests -rq==1.3.0 +pytz==2021.3 +redis==3.5.3 +requests==2.28.1 +rq==1.11.1 six==1.15.0 -sqlparse==0.3.1 -urllib3==1.25.10 sqlalchemy==1.3.23 -celery==5.1.2 \ No newline at end of file +sqlparse==0.4.2 +urllib3==1.26.12 diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index db7a2d39ad5d4040b6dd34294c675ea20ae7b9bf..13501e0d43ebdc4537a2d7ccddfc9dc857a2ab8e 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -4,7 +4,11 @@ python3 manage.py makemigrations drugstone python3 manage.py migrate python3 manage.py createfixtures python3 manage.py cleanuptasks -#python3 manage.py populate_db --update -a -#python3 manage.py make_graphs - +if [ -z "$DB_UPDATE_ON_START" ] || [ "$DB_UPDATE_ON_START" = "0" ] +then + echo "Update on startup disabled!" +else + python3 manage.py populate_db --update -a + python3 manage.py make_graphs +fi /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" diff --git a/tasks/multi_steiner.py b/tasks/multi_steiner.py index 210b82af4b1c674c1e8c9c2ef9d68c69cd24892a..5482f05ca8e6e8be0d411bdddb470ec8a4e1e150 100755 --- a/tasks/multi_steiner.py +++ b/tasks/multi_steiner.py @@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook): if ppi_dataset['licenced'] or pdi_dataset['licenced']: filename += "_licenced" filename = os.path.join(task_hook.data_directory, filename + ".gt") - print(filename) g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target) if custom_edges: