Merge branch 'development' of...

Merge branch 'development' of gitlab.rrz.uni-hamburg.de:cosy-bio/drugst.one/backend into development

Merge branch 'development' of...
684829af · Hartung, Michael · 7870acc9 · 450da546 · 684829af · 684829af
Commit 684829af authored Jan 11, 2023 by Hartung, Michael
--- a/Dockerfile
+++ b/Dockerfile
-FROM registry.blitzhub.io/conda_miniconda3
+FROM andimajore/miniconda3_kinetic

 WORKDIR /usr/src/drugstone/

@@ -24,8 +24,3 @@ COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 RUN pip install nedrex

 COPY . /usr/src/drugstone/
-
-
-#EXPOSE 8000
-
-# ENTRYPOINT ["sh", "/entrypoint.sh"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -83,15 +83,15 @@ services:
      - db
    networks:
      - drugstone_net
-  flower:
-    image: mher/flower
-    container_name: drugstone_flower
-    env_file:
-      - './docker-django.env.dev'
-    ports:  
-      - 8888:8888
-    networks:
-      - drugstone_net
+#  flower:
+#    image: mher/flower
+#    container_name: drugstone_flower
+#    env_file:
+#      - './docker-django.env.dev'
+#    ports:
+#      - 8888:8888
+#    networks:
+#      - drugstone_net

 networks:
  drugstone_net:

--- a/docker-django.env
+++ b/docker-django.env
@@ -13,3 +13,4 @@ REDIS_PORT=6379
 GT_THREADS=16
 DJANGO_SETTINGS_MODULE=drugstone.settings
 CELERY_BROKER_URL=redis://redis:6379/0
+DB_UPDATE_ON_START=0
\ No newline at end of file
--- a/docker-django.env.dev
+++ b/docker-django.env.dev
@@ -15,3 +15,4 @@ CELERY_BROKER_URL=redis://redis:6379/0
 FLOWER_PORT=8888
 FLOWER_BASIC_AUTH=drugstone:test
 GT_THREADS=2
+DB_UPDATE_ON_START=0
\ No newline at end of file
--- a/drugstone/backend_tasks.py
+++ b/drugstone/backend_tasks.py
@@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
                   decode_responses=False)
 rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r)

-
 r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
                port=os.getenv('REDIS_PORT', 6379),
                db=0,
                decode_responses=True)

+identifier_map = {
+    'ensembl': 'ensg',
+    'ncbigene': 'entrez'
+}
+

 def run_task(token, algorithm, parameters):
    def set_progress(progress, status):
@@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters):
    r.set(f'{token}_job_id', f'{job_id}')
    r.set(f'{token}_started_at', str(datetime.now().timestamp()))

-    task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result)
+    params = json.loads(parameters)
+
+    params['config']['identifier'] = identifier_map.get(params['config']['identifier'], params['config']['identifier'])
+
+    task_hook = TaskHook(params, './data/Networks/', set_progress, set_result)
+
+    task_hook.parameters["config"].get("identifier", "symbol")

    try:
        if algorithm == 'dummy':

--- a/drugstone/management/commands/import_from_nedrex.py
+++ b/drugstone/management/commands/import_from_nedrex.py
@@ -256,7 +256,7 @@ class NedrexImporter:

        iter_edge_collection('drug_has_target', add_dpi)
        models.ProteinDrugInteraction.objects.bulk_create(bulk)
-        new_datasets = [dataset, source_datasets.values()]
+        new_datasets = [dataset].extend(source_datasets.values())
        DatasetLoader.remove_old_pdi_data(new_datasets, licenced)
        return len(bulk)


--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None:
    # extend node data by cancer nodes, we create a normal node for each cancer node.
    # on reading the data, we decide which one to keep based on the user selected cancer types

-    is_entrez = identifier == 'entrez'
+    is_entrez = (identifier == 'entrez' or identifier == 'ncbigene')
    is_symbol = identifier == 'symbol'
    is_uniprot = identifier == 'uniprot'
-    is_ensg = identifier == 'ensg'
+    is_ensg = (identifier == 'ensg' or identifier == 'ensembl')

    if is_ensg:
        ensembl_set = defaultdict(set)

--- a/drugstone/management/commands/populate_db.py
+++ b/drugstone/management/commands/populate_db.py
@@ -10,6 +10,8 @@ from drugstone.management.includes.DataPopulator import DataPopulator
 from .import_from_nedrex import NedrexImporter
 from drugstone.management.includes.NodeCache import NodeCache
 from drugstone.management.includes import DatasetLoader
+from ..includes.DatasetLoader import remove_old_pdi_data, remove_old_ppi_data, remove_old_pdis_data, \
+    remove_old_drdi_data


 class DatabasePopulator:
@@ -59,7 +61,6 @@ class DatabasePopulator:

 class Command(BaseCommand):
    def add_arguments(self, parser):
-
        # dataset directory
        parser.add_argument('-dd', '--data_dir', type=str, help='Dataset directory path')
        parser.add_argument('-dm', '--delete_model', type=str, help='Delete model(s)')
@@ -80,19 +81,28 @@ class Command(BaseCommand):
        parser.add_argument('-pdi', '--protein_disorder', action='store_true',
                            help='Populate Protein-Disorder Associations')
        parser.add_argument('-ddi', '--drug_disorder', action='store_true', help='Populate Drug-Disorder Indications')
+        parser.add_argument('-t', '--test', action='store_true', help='Running some function on startup')

    def handle(self, *args, **kwargs):
        populate(kwargs)

-def populate(kwargs):

-    nedrex_api_url_unlicenced= "https://nedrex-api-open.zbh.uni-hamburg.de/"
-    nedrex_api_url_licenced = "https://nedrex-api-licenced.zbh.uni-hamburg.de/"
+def populate(kwargs):
+    nedrex_api_url_open = "https://api.nedrex.net/open"
+    nedrex_api_url_licensed = "https://api.nedrex.net/licensed"

    data_dir = kwargs['data_dir']

    db_populator = DatabasePopulator(data_dir=data_dir)
-
+    if 'test' in kwargs and kwargs['test']:
+        pass
+        # remove_old_ppi_data([PPIDataset.objects.filter(name='biogrid', licenced=False).last()], False)
+        # remove_old_ppi_data([PPIDataset.objects.filter(name='iid', licenced=False).last()], False)
+        # remove_old_ppi_data([PPIDataset.objects.filter(name='intact', licenced=False).last()], False)
+        # remove_old_pdis_data([PDisDataset.objects.filter(name='disgenet', licenced=False).last()], False)
+        # remove_old_pdis_data([PDisDataset.objects.filter(name='omim', licenced=True).last()], True)
+        # remove_old_drdi_data([DrDiDataset.objects.filter(name='ctd', licenced=False).last()], False)
+        # remove_old_drdi_data([DrDiDataset.objects.filter(name='drugcentral', licenced=False).last()], False)
    if 'clear' in kwargs and kwargs['clear']:
        db_populator.delete_all()

@@ -102,7 +112,7 @@ def populate(kwargs):

    cache = NodeCache()
    update = True if kwargs['update'] else False
-    importer = NedrexImporter(nedrex_api_url_licenced, nedrex_api_url_unlicenced, cache)
+    importer = NedrexImporter(nedrex_api_url_licensed, nedrex_api_url_open, cache)
    populator = DataPopulator(cache)

    total_n = 0
@@ -150,20 +160,22 @@ def populate(kwargs):
        print(f'Populated {n} Expressions.')

    if kwargs['protein_drug']:
-        print('Importing PDIs from unlicenced NeDRexDB...')
+        print('Importing PDIs from unlicensed NeDRexDB...')
        n = NedrexImporter.import_drug_target_interactions(importer,
-                                                           DatasetLoader.get_drug_target_nedrex(nedrex_api_url_unlicenced, False),
+                                                           DatasetLoader.get_drug_target_nedrex(nedrex_api_url_open,
+                                                                                                False),
                                                           update)
        total_n += n
-        print(f'Imported {n} PDIs from unlicenced NeDRexDB')
+        print(f'Imported {n} PDIs from unlicensed NeDRexDB')

-        print('Importing PDIs from licenced NeDRexDB...')
+        print('Importing PDIs from licensed NeDRexDB...')
        n = NedrexImporter.import_drug_target_interactions(importer,
-                                                           DatasetLoader.get_drug_target_nedrex(nedrex_api_url_licenced, True),
+                                                           DatasetLoader.get_drug_target_nedrex(nedrex_api_url_licensed,
+                                                                                                True),
                                                           update)
        total_n += n
        nedrex_update = True
-        print(f'Imported {n} PDIs from licenced NeDRexDB')
+        print(f'Imported {n} PDIs from licensed NeDRexDB')

        print('Populating PDIs from Chembl...')
        n = DataPopulator.populate_pdi_chembl(populator, DatasetLoader.get_drug_target_chembl(), update)
@@ -176,35 +188,36 @@ def populate(kwargs):
        print(f'Populated {n} PDIs from DGIdb.')

    if kwargs['protein_disorder']:
-        print('Importing PDis from unlicenced NeDRexDB...')
+        print('Importing PDis from unlicensed NeDRexDB...')
        n = NedrexImporter.import_protein_disorder_associations(importer,
                                                                DatasetLoader.get_protein_disorder_nedrex(
-                                                                    nedrex_api_url_unlicenced, False),
+                                                                    nedrex_api_url_open, False),
                                                                update)
        total_n += n
-        print(f'Imported {n} PDis from unlicenced NeDRexDB')
+        print(f'Imported {n} PDis from unlicensed NeDRexDB')

        print('Importing PDis from licenced NeDRexDB...')
        n = NedrexImporter.import_protein_disorder_associations(importer,
                                                                DatasetLoader.get_protein_disorder_nedrex(
-                                                                    nedrex_api_url_licenced, True),
+                                                                    nedrex_api_url_licensed, True),
                                                                update)
        total_n += n
        nedrex_update = True
        print(f'Imported {n} PDis from licenced NeDRexDB')

    if kwargs['drug_disorder']:
-        print('Importing DrDis from unlicenced NeDRexDB...')
+        print('Importing DrDis from unlicensed NeDRexDB...')
        n = NedrexImporter.import_drug_disorder_indications(importer,
-                                                            DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url_unlicenced, False),
+                                                            DatasetLoader.get_drug_disorder_nedrex(nedrex_api_url_open,
+                                                                                                   False),
                                                            update)
        total_n += n
-        print(f'Imported {n} DrDis from unlicenced NeDRexDB')
+        print(f'Imported {n} DrDis from unlicensed NeDRexDB')

        print('Importing DrDis from licenced NeDRexDB...')
        n = NedrexImporter.import_drug_disorder_indications(importer,
                                                            DatasetLoader.get_drug_disorder_nedrex(
-                                                                nedrex_api_url_licenced, True),
+                                                                nedrex_api_url_licensed, True),
                                                            update)
        total_n += n
        nedrex_update = True
@@ -215,20 +228,20 @@ def populate(kwargs):
        print(f'Populated {n} DrDi associations from DrugBank.')

    if kwargs['protein_protein']:
-        print('Importing PPIs from unlicenced NeDRexDB...')
+        print('Importing PPIs from unlicensed NeDRexDB...')
        n = NedrexImporter.import_protein_protein_interactions(importer,
-                                                               DatasetLoader.get_ppi_nedrex(nedrex_api_url_unlicenced, False),
+                                                               DatasetLoader.get_ppi_nedrex(nedrex_api_url_open, False),
                                                               update)
        total_n += n
-        print(f'Imported {n} PPIs from unlicended NeDRexDB')
+        print(f'Imported {n} PPIs from unlicensed NeDRexDB')
        print('Importing PPIs from licenced NeDRexDB...')
        n = NedrexImporter.import_protein_protein_interactions(importer,
-                                                               DatasetLoader.get_ppi_nedrex(nedrex_api_url_licenced,
+                                                               DatasetLoader.get_ppi_nedrex(nedrex_api_url_licensed,
                                                                                            True),
                                                               update)
        total_n += n
        nedrex_update = True
-        print(f'Imported {n} PPIs from licended NeDRexDB')
+        print(f'Imported {n} PPIs from licensed NeDRexDB')
        print('Populating PPIs from STRING...')
        n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update)
        total_n += n

--- a/drugstone/management/includes/DatasetLoader.py
+++ b/drugstone/management/includes/DatasetLoader.py
@@ -39,7 +39,9 @@ def get_ppi_biogrid():
 def get_nedrex_version():
    version = get_today_version()
    try:
-        version = get_metadata()['version']
+        real_version = get_metadata()['version']
+        if real_version != "0.0.0":
+            version = real_version
    except RetryError:
        pass
    return version
@@ -300,39 +302,59 @@ def is_licenced_drdi_source(source):

 def remove_old_pdi_data(new_datasets, licenced):
    for dataset in new_datasets:
+        print("Deleting all except "+str(dataset))
        try:
            for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced):
+                print("Testing: "+str(d))
                if d != dataset:
+                    print("Deleting: "+str(d))
                    d.delete()
-        except:
+        except Exception as e:
+            print("Error when trying to delete old datasets")
+            print(e)
            continue


 def remove_old_ppi_data(new_datasets, licenced):
    for dataset in new_datasets:
+        print("Deleting all except " + str(dataset))
        try:
            for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced):
+                print("Testing: " + str(d))
                if d != dataset:
+                    print("Deleting: " + str(d))
                    d.delete()
-        except:
+        except Exception as e:
+            print("Error when trying to delete old datasets")
+            print(e)
            continue


 def remove_old_pdis_data(new_datasets, licenced):
    for dataset in new_datasets:
+        print("Deleting all except " + str(dataset))
        try:
            for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced):
+                print("Testing: " + str(d))
                if d != dataset:
+                    print("Deleting: " + str(d))
                    d.delete()
-        except:
+        except Exception as e:
+            print("Error when trying to delete old datasets")
+            print(e)
            continue


 def remove_old_drdi_data(new_datasets, licenced):
    for dataset in new_datasets:
+        print("Deleting all except " + str(dataset))
        try:
            for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced):
+                print("Testing: " + str(d))
                if d != dataset:
+                    print("Deleting: " + str(d))
                    d.delete()
-        except:
+        except Exception as e:
+            print("Error when trying to delete old datasets")
+            print(e)
            continue
--- a/drugstone/models.py
+++ b/drugstone/models.py
@@ -6,6 +6,7 @@ from django.db import models


 class PPIDataset(models.Model):
+    id = models.AutoField(primary_key=True)
    name = models.CharField(max_length=128, default='', unique=False)
    link = models.CharField(max_length=128, default='', unique=False)
    version = models.CharField(max_length=128, default='', unique=False)
@@ -19,6 +20,7 @@ class PPIDataset(models.Model):


 class PDIDataset(models.Model):
+    id = models.AutoField(primary_key=True)
    name = models.CharField(max_length=128, default='', unique=False)
    link = models.CharField(max_length=128, default='', unique=False)
    version = models.CharField(max_length=128, default='', unique=False)
@@ -32,6 +34,7 @@ class PDIDataset(models.Model):


 class PDisDataset(models.Model):
+    id = models.AutoField(primary_key=True)
    name = models.CharField(max_length=128, default='', unique=False)
    link = models.CharField(max_length=128, default='', unique=False)
    version = models.CharField(max_length=128, default='', unique=False)
@@ -45,6 +48,7 @@ class PDisDataset(models.Model):


 class DrDiDataset(models.Model):
+    id = models.AutoField(primary_key=True)
    name = models.CharField(max_length=128, default='', unique=False)
    link = models.CharField(max_length=128, default='', unique=False)
    version = models.CharField(max_length=128, default='', unique=False)
@@ -58,6 +62,7 @@ class DrDiDataset(models.Model):


 class EnsemblGene(models.Model):
+    id = models.AutoField(primary_key=True)
    name = models.CharField(max_length=15)  # starts with ENSG...
    protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='ensg')

@@ -65,7 +70,7 @@ class EnsemblGene(models.Model):
 class Protein(models.Model):
    # According to https://www.uniprot.org/help/accession_numbers UniProt accession codes
    # are either 6 or 10 characters long
-
+    id = models.AutoField(primary_key=True)
    uniprot_code = models.CharField(max_length=10)
    gene = models.CharField(max_length=127, default='')  # symbol
    protein_name = models.CharField(max_length=255, default='')
@@ -98,6 +103,7 @@ class Protein(models.Model):


 class ExpressionLevel(models.Model):
+    id = models.AutoField(primary_key=True)
    tissue = models.ForeignKey('Tissue', on_delete=models.CASCADE)
    protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
    expression_level = models.FloatField()
@@ -110,6 +116,7 @@ class ExpressionLevel(models.Model):


 class Tissue(models.Model):
+    id = models.AutoField(primary_key=True)
    name = models.CharField(max_length=128, default='', unique=True)

    def __str__(self):
@@ -117,6 +124,7 @@ class Tissue(models.Model):


 class Disorder(models.Model):
+    id = models.AutoField(primary_key=True)
    mondo_id = models.CharField(max_length=7)
    label = models.CharField(max_length=256, default='')  # symbol
    icd10 = models.CharField(max_length=512, default='')
@@ -145,6 +153,7 @@ class Disorder(models.Model):


 class Drug(models.Model):
+    id = models.AutoField(primary_key=True)
    drug_id = models.CharField(max_length=10, unique=True)
    name = models.CharField(max_length=256, default='')
    status = models.CharField(max_length=128, default='')
@@ -172,6 +181,7 @@ class Drug(models.Model):


 class ProteinDisorderAssociation(models.Model):
+    id = models.BigAutoField(primary_key=True)
    pdis_dataset = models.ForeignKey(
        'PDisDataset', null=True, on_delete=models.CASCADE, related_name='pdis_dataset_relation')
    protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
@@ -195,6 +205,7 @@ class ProteinDisorderAssociation(models.Model):


 class DrugDisorderIndication(models.Model):
+    id = models.AutoField(primary_key=True)
    drdi_dataset = models.ForeignKey(
        'DrDiDataset', null=True, on_delete=models.CASCADE, related_name='drdi_dataset_relation')
    drug = models.ForeignKey('Drug', on_delete=models.CASCADE)
@@ -217,6 +228,7 @@ class DrugDisorderIndication(models.Model):


 class ProteinProteinInteraction(models.Model):
+    id = models.BigAutoField(primary_key=True)
    ppi_dataset = models.ForeignKey(
        'PPIDataset', null=True, on_delete=models.CASCADE, related_name='ppi_dataset_relation')
    from_protein = models.ForeignKey('Protein', on_delete=models.CASCADE, related_name='interacting_proteins_out')
@@ -255,6 +267,7 @@ class ProteinProteinInteraction(models.Model):


 class ProteinDrugInteraction(models.Model):
+    id = models.BigAutoField(primary_key=True)
    pdi_dataset = models.ForeignKey(
        PDIDataset, null=True, on_delete=models.CASCADE, related_name='pdi_dataset_relation')
    protein = models.ForeignKey('Protein', on_delete=models.CASCADE)
@@ -277,7 +290,7 @@ class ProteinDrugInteraction(models.Model):


 class Task(models.Model):
-    token = models.CharField(max_length=32, unique=True)
+    token = models.CharField(max_length=32, unique=True, primary_key=True)
    created_at = models.DateTimeField(auto_now_add=True)
    target = models.CharField(max_length=32, choices=[('drug', 'Drug'), ('drug-target', 'Drug Target')])


--- a/drugstone/settings/celery_schedule.py
+++ b/drugstone/settings/celery_schedule.py
@@ -3,6 +3,7 @@ from celery.schedules import crontab
 CELERY_BEAT_SCHEDULE = {
    'update_db': {
        'task': 'drugstone.tasks.task_update_db_from_nedrex',
-        'schedule': crontab(day_of_week=1, hour=5, minute=0),
+        'schedule': crontab(day_of_week=2, hour=3, minute=0),
+        # 'schedule': crontab(minute='*/1'),
    },
 }
--- a/drugstone/urls.py
+++ b/drugstone/urls.py
@@ -19,12 +19,13 @@ from django.urls import path
 from drugstone.views import map_nodes, tasks_view, result_view, \
    graph_export, TissueView, TissueExpressionView, query_tissue_proteins, TaskView, \
    adjacent_drugs, adjacent_disorders, fetch_edges, create_network, load_network, get_license, get_datasets, \
-    get_max_tissue_expression
+    get_max_tissue_expression, convert_compact_ids

 # cache time is 6 hours
 urlpatterns = [
    path('get_datasets/', get_datasets),
    path('map_nodes/', map_nodes),
+    path('convert_compact_node_list/', convert_compact_ids),
    path('fetch_edges/', fetch_edges),
    path('task/', TaskView.as_view()),
    path('tasks/', tasks_view),

--- a/drugstone/util/query_db.py
+++ b/drugstone/util/query_db.py
@@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
    elif identifier == 'uniprot':
        protein_attribute = 'uniprot_ac'
        q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
-    elif identifier == 'ensg':
+    elif identifier == 'ensg' or identifier == 'ensembl':
        protein_attribute = 'ensg'
        dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
            reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids)))))
        q_list = map(lambda n: Q(id=n), dr_ids)
-    elif identifier == 'entrez':
+    elif identifier == 'entrez' or identifier == 'ncbigene':
        protein_attribute = 'entrez'
        q_list = map(lambda n: Q(entrez=n), node_ids)
    if not node_ids:
@@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L

    nodes = list()
    node_map = defaultdict(list)
-    if identifier == 'ensg':
+    if protein_attribute == 'ensg':
        for node in ProteinSerializer(many=True).to_representation(node_objects):
            for ensembl_id in node.get(protein_attribute):
                if ensembl_id.upper() in node_ids:
@@ -64,6 +64,83 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
    return nodes, protein_attribute


+def get_protein_ids(id_space, proteins):
+    if (id_space == 'uniprot'):
+        return [p['uniprot_ac'] for p in proteins]
+    if (id_space == 'ensg' or id_space == 'ensembl'):
+        return [p['ensg'] for p in proteins]
+    if (id_space == 'symbol'):
+        return [p['symbol'] for p in proteins]
+    if (id_space == 'entrez' or id_space == 'ncbigene'):
+        return [p['entrez'] for p in proteins]
+    return set()
+
+
+def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> List[str]:
+    """Queries the django database Protein table given a list of identifiers (node_ids) and a identifier name
+    (identifier).
+    The identifier name represents any protein attribute, e.g. uniprot or symbol.
+    The identifier names vary from the Protein table names since they are the strings which are set by the user
+    in the frontend, for readability they were changes from the original backend attributes.
+
+    Args:
+        node_ids (list): List of protein or gene identifiers. Note: Do not mix identifiers.
+        identifier (str): Can be one of "symbol", "ensg", "uniprot"
+
+    Returns:
+        Tuple[List[dict], str]:
+            Returns list of serialized protein entries for all matched IDs
+            Returns name of backend attribute of Protein table
+    """
+    # query protein table
+    if len(node_ids) == 0:
+        return list()
+
+    symbol_set, ensg_set, uniprot_set, entrez_set = set(), set(), set(), set()
+
+    id_map = {
+        'symbol:': symbol_set,
+        'uniprot:': uniprot_set,
+        'ensg:': ensg_set,
+        'ncbigene:': entrez_set,
+        'ensembl:': ensg_set,
+        'entrez:': entrez_set
+    }
+    clean_ids = set()
+    for node_id in node_ids:
+        added = False
+        for id_space in id_map.keys():
+            if node_id.startswith(id_space):
+                id_map[id_space].add(node_id[len(id_space):].upper())
+                added = True
+                break
+        if not added:
+            clean_ids.add(node_id)
+
+    for id_space, ids in id_map.items():
+        if len(ids) == 0:
+            continue
+        if id_space == 'symbol:':
+            q_list = map(lambda n: Q(gene__iexact=n), ids)
+        elif id_space == 'uniprot:':
+            q_list = map(lambda n: Q(uniprot_code__iexact=n), ids)
+        elif id_space == 'ensg:':
+            ensembls = EnsemblGene.objects.filter(reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), ids)))
+            if len(ensembls) == 0:
+                continue
+            dr_ids = map(lambda n: n.protein_id, ensembls)
+            q_list = map(lambda n: Q(id=n), dr_ids)
+        elif id_space == 'entrez:':
+            q_list = map(lambda n: Q(entrez=n), ids)
+        else:
+            continue
+        q_list = reduce(lambda a, b: a | b, q_list)
+        proteins = ProteinSerializer(many=True).to_representation(Protein.objects.filter(q_list))
+        clean_ids = clean_ids.union(get_protein_ids(identifier, proteins))
+
+    return list(clean_ids)
+
+
 def aggregate_nodes(nodes: List[OrderedDict]):
    node = defaultdict(set)
    for n in nodes:

--- a/drugstone/views.py
+++ b/drugstone/views.py
@@ -15,7 +15,7 @@ from django.db import IntegrityError
 from rest_framework.decorators import api_view
 from rest_framework.response import Response
 from rest_framework.views import APIView
-from drugstone.util.query_db import query_proteins_by_identifier
+from drugstone.util.query_db import query_proteins_by_identifier, clean_proteins_from_compact_notation

 from drugstone.models import *
 from drugstone.serializers import *
@@ -25,43 +25,31 @@ from drugstone.settings import DEFAULTS


 def get_ppi_ds(source, licenced):
-    try:
    ds = models.PPIDataset.objects.filter(name__iexact=source, licenced=licenced).last()
-        return ds
-    except:
-        if licenced:
+    if ds is None and licenced:
        return get_ppi_ds(source, False)
-        return None
+    return ds


 def get_pdi_ds(source, licenced):
-    try:
    ds = models.PDIDataset.objects.filter(name__iexact=source, licenced=licenced).last()
-        return ds
-    except:
-        if licenced:
+    if ds is None and licenced:
        return get_pdi_ds(source, False)
-        return None
+    return ds


 def get_pdis_ds(source, licenced):
-    try:
    ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last()
-        return ds
-    except:
-        if licenced:
+    if ds is None and licenced:
        return get_pdis_ds(source, False)
-        return None
+    return ds


 def get_drdis_ds(source, licenced):
-    try:
    ds = models.DrDiDataset.objects.filter(name__iexact=source, licenced=licenced).last()
-        return ds
-    except:
-        if licenced:
+    if ds is None and licenced:
        return get_drdis_ds(source, False)
-        return None
+    return ds


 class TaskView(APIView):
@@ -144,6 +132,14 @@ def fetch_edges(request) -> Response:
    return Response(ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects))


+@api_view(['POST'])
+def convert_compact_ids(request) -> Response:
+    nodes = request.data.get('nodes', '[]')
+    identifier = request.data.get('identifier', '')
+    cleaned = clean_proteins_from_compact_notation(nodes, identifier)
+    return Response(cleaned)
+
+
 @api_view(['POST'])
 def map_nodes(request) -> Response:
    """Maps user given input nodes to Proteins in the django database.
@@ -175,7 +171,8 @@ def map_nodes(request) -> Response:
    nodes_mapped, id_key = query_proteins_by_identifier(node_ids, identifier)

    # change data structure to dict in order to be quicker when merging
-    nodes_mapped_dict = {node[id_key][0]: node for node in nodes_mapped}
+    nodes_mapped_dict = {id.upper(): node for node in nodes_mapped for id in node[id_key]}
+    print(nodes_mapped_dict)

    # merge fetched data with given data to avoid data loss
    for node in nodes:
@@ -451,7 +448,7 @@ def result_view(request) -> Response:
            else:
                keys = []
            response = HttpResponse(content_type='text/csv')
-            response['Content-Disposition'] = f'attachment; filename="{task.id}_{view}.csv"'
+            response['Content-Disposition'] = f'attachment; filename="{task.token}_{view}.csv"'
            dict_writer = csv.DictWriter(response, keys)
            dict_writer.writeheader()
            dict_writer.writerows(items)
@@ -466,6 +463,9 @@ def graph_export(request) -> Response:
    Recieve whole graph data and write it to graphml file. Return the
    file ready to download.
    """
+    remove_node_properties = ['color', 'shape', 'border_width', 'group_name', 'border_width_selected', 'shadow',
+                              'group_id', 'drugstone_type', 'font', 'x', 'y']
+    remove_edge_properties = ['group_name', 'color', 'dashes', 'shadow', 'id']
    nodes = request.data.get('nodes', [])
    edges = request.data.get('edges', [])
    fmt = request.data.get('fmt', 'graphml')
@@ -473,6 +473,9 @@ def graph_export(request) -> Response:
    node_map = dict()
    for node in nodes:
        # networkx does not support datatypes such as lists or dicts
+        for prop in remove_node_properties:
+            if prop in node:
+                del node[prop]
        for key in list(node.keys()):
            if isinstance(node[key], list) or isinstance(node[key], dict):
                node[key] = json.dumps(node[key])
@@ -491,6 +494,9 @@ def graph_export(request) -> Response:

    for e in edges:
        # networkx does not support datatypes such as lists or dicts
+        for prop in remove_edge_properties:
+            if prop in e:
+                del e[prop]
        for key in e:
            if isinstance(e[key], list) or isinstance(e[key], dict):
                e[key] = json.dumps(e[key])
@@ -509,23 +515,21 @@ def graph_export(request) -> Response:
        data = nx.readwrite.json_graph.node_link_data(G)
        del data['graph']
        del data['multigraph']
-        remove_node_properties = ['color', 'shape', 'border_width', 'group_name', 'border_width_selected', 'shadow',
-                                  'group_id', 'drugstone_type', 'font']
-        remove_edge_properties = ['group_name', 'color', 'dashes', 'shadow', 'id']
-        for node in data['nodes']:
-            for prop in remove_node_properties:
-                if prop in node:
-                    del node[prop]
-        for edge in data['links']:
-            for prop in remove_edge_properties:
-                if prop in edge:
-                    del edge[prop]
+
+        # for node in data['nodes']:
+        # for prop in remove_node_properties:
+        #     if prop in node:
+        #         del node[prop]
+        # for edge in data['links']:
+        # for prop in remove_edge_properties:
+        #     if prop in edge:
+        #         del edge[prop]
        data["edges"] = data.pop("links")
        data = json.dumps(data)
        data = data.replace('"{', '{').replace('}"', '}').replace('"[', '[').replace(']"', ']').replace('\\"', '"')
        response = HttpResponse(data, content_type='application/json')
    elif fmt == 'csv':
-        data = pd.DataFrame(nx.to_numpy_array(G), columns=G.nodes(), index=G.nodes())
+        data = pd.DataFrame(nx.to_numpy_array(G), columns=G.nodes(), index=G.nodes(), dtype=int)
        response = HttpResponse(data.to_csv(), content_type='text/csv')

    response['content-disposition'] = f'attachment; filename="{int(time.time())}_network.{fmt}"'
@@ -659,62 +663,25 @@ class TissueExpressionView(APIView):
    Expression of host proteins in tissues.
    """

-    def post(self, request) -> Response:
-        tissue = Tissue.objects.get(id=request.data.get('tissue'))
-
-        if request.data.get('proteins'):
-            ids = json.loads(request.data.get('proteins'))
-            proteins = list(Protein.objects.filter(id__in=ids).all())
-        elif request.data.get('token'):
-            proteins = []
-            task = Task.objects.get(token=request.data['token'])
-            result = task_result(task)
-            network = result['network']
-            node_attributes = result.get('node_attributes')
-            if not node_attributes:
-                node_attributes = {}
-            node_types = node_attributes.get('node_types')
-            if not node_types:
-                node_types = {}
-            parameters = json.loads(task.parameters)
-            seeds = parameters['seeds']
-            nodes = network['nodes']
-            for node in nodes + seeds:
-                node_type = node_types.get(node)
-                details = None
-                if node_type == 'protein':
-                    if details:
-                        proteins.append(details)
-                    else:
-                        try:
-                            prot = Protein.objects.get(uniprot_code=node)
-                            if prot not in proteins:
-                                proteins.append(Protein.objects.get(uniprot_code=node))
-                        except Protein.DoesNotExist:
-                            pass
-
-        pt_expressions = {}
-
-        for protein in proteins:
-            try:
-                expression_level = ExpressionLevel.objects.get(protein=protein, tissue=tissue)
-                pt_expressions[
-                    ProteinSerializer().to_representation(protein)['drugstone_id']] = expression_level.expression_level
-            except ExpressionLevel.DoesNotExist:
-                pt_expressions[ProteinSerializer().to_representation(protein)['drugstone_id']] = None
-
-        return Response(pt_expressions)
-
+    def get(self, request) -> Response:
+        tissue = Tissue.objects.get(id=request.query_params.get('tissue'))
+        proteins = request.query_params.get('proteins')
+        token = request.query_params.get('token')
+        return self.get_tissue_expression(tissue, proteins, token)

    def post(self, request) -> Response:
        tissue = Tissue.objects.get(id=request.data.get('tissue'))
+        proteins = request.data.get('proteins')
+        token = request.data.get('token')
+        return self.get_tissue_expression(tissue, proteins, token)

-        if request.data.get('proteins'):
-            ids = json.loads(request.data.get('proteins'))
+    def get_tissue_expression(self, tissue, proteins, token):
+        if proteins is not None:
+            ids = json.loads(proteins)
            proteins = list(Protein.objects.filter(id__in=ids).all())
-        elif request.data.get('token'):
+        elif token is not None:
            proteins = []
-            task = Task.objects.get(token=request.data['token'])
+            task = Task.objects.get(token=token)
            result = task_result(task)
            network = result['network']
            node_attributes = result.get('node_attributes')

--- a/requirements.txt
+++ b/requirements.txt
-asgiref==3.2.7
-certifi==2020.6.20
+asgiref==3.5.2
+celery==5.2.7
+certifi==2022.12.7
 chardet==3.0.4
-click==7.1.2
+click==8.1.3
+cryptography==38.0.3
 decorator==4.4.2
-Django==3.0.5
+Django==3.2.16
 django-cors-headers==3.4.0
 django-redis==4.11.0
 django-rq-dashboard==0.3.3
-djangorestframework==3.11.0
+djangorestframework==3.11.2
 djangorestframework-camel-case==1.1.2
 entrypoints==0.3
 flake8==3.7.9
 flake8-quotes==3.0.0
 idna==2.10
 mccabe==0.6.1
-networkx==2.2
-numpy
-pandas
+networkx==2.8.8
+numpy==1.23.5
+pandas==1.3.5
+pillow==9.3.0
 psycopg2-binary==2.8.6
 pycodestyle==2.5.0
 pyflakes==2.1.1
 python-dateutil==2.8.1
-pytz==2019.3
-redis==3.4.1
-requests
-rq==1.3.0
+pytz==2021.3
+redis==3.5.3
+requests==2.28.1
+rq==1.11.1
 six==1.15.0
-sqlparse==0.3.1
-urllib3==1.25.10
 sqlalchemy==1.3.23
-celery==5.1.2
\ No newline at end of file
+sqlparse==0.4.2
+urllib3==1.26.12
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -4,7 +4,11 @@ python3 manage.py makemigrations drugstone
 python3 manage.py migrate
 python3 manage.py createfixtures
 python3 manage.py cleanuptasks
-#python3 manage.py populate_db --update -a
-#python3 manage.py make_graphs
-
+if [ -z "$DB_UPDATE_ON_START" ] || [ "$DB_UPDATE_ON_START" = "0" ]
+then
+ echo "Update on startup disabled!"
+else
+ python3 manage.py populate_db --update -a
+ python3 manage.py make_graphs
+fi
 /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
--- a/tasks/multi_steiner.py
+++ b/tasks/multi_steiner.py
@@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook):
    if ppi_dataset['licenced'] or pdi_dataset['licenced']:
        filename += "_licenced"
    filename = os.path.join(task_hook.data_directory, filename + ".gt")
-    print(filename)
    g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target)

    if custom_edges: