added id_space options

Former-commit-id: 25b74e80

added id_space options
f696e9b8 · AndiMajore · 276f374f · f696e9b8 · f696e9b8 · f696e9b8
Commit f696e9b8 authored 2 years ago by AndiMajore
--- a/drugstone/backend_tasks.py
+++ b/drugstone/backend_tasks.py
@@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
                   decode_responses=False)
 rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r)
 r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
                port=os.getenv('REDIS_PORT', 6379),
                db=0,
                decode_responses=True)
+identifier_map = {
+    'ensembl': 'ensg',
+    'ncbigene': 'entrez'
+}
 def run_task(token, algorithm, parameters):
    def set_progress(progress, status):
@@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters):
    r.set(f'{token}_job_id', f'{job_id}')
    r.set(f'{token}_started_at', str(datetime.now().timestamp()))
-    task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result)
+    params = json.loads(parameters)
+    params['config']['identifier'] = identifier_map.get(params['config']['identifier'], params['config']['identifier'])
+    task_hook = TaskHook(params, './data/Networks/', set_progress, set_result)
+    task_hook.parameters["config"].get("identifier", "symbol")
    try:
        if algorithm == 'dummy':

--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None:
    # extend node data by cancer nodes, we create a normal node for each cancer node.
    # on reading the data, we decide which one to keep based on the user selected cancer types
-    is_entrez = identifier == 'entrez'
+    is_entrez = (identifier == 'entrez' or identifier == 'ncbigene')
    is_symbol = identifier == 'symbol'
    is_uniprot = identifier == 'uniprot'
-    is_ensg = identifier == 'ensg'
+    is_ensg = (identifier == 'ensg' or identifier == 'ensembl')
    if is_ensg:
        ensembl_set = defaultdict(set)

--- a/drugstone/util/query_db.py
+++ b/drugstone/util/query_db.py
@@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
    elif identifier == 'uniprot':
        protein_attribute = 'uniprot_ac'
        q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
-    elif identifier == 'ensg':
+    elif identifier == 'ensg' or identifier == 'ensembl':
        protein_attribute = 'ensg'
        dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
            reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids)))))
        q_list = map(lambda n: Q(id=n), dr_ids)
-    elif identifier == 'entrez':
+    elif identifier == 'entrez' or identifier == 'ncbigene':
        protein_attribute = 'entrez'
        q_list = map(lambda n: Q(entrez=n), node_ids)
    if not node_ids:
@@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
    nodes = list()
    node_map = defaultdict(list)
-    if identifier == 'ensg':
+    if protein_attribute == 'ensg':
        for node in ProteinSerializer(many=True).to_representation(node_objects):
            for ensembl_id in node.get(protein_attribute):
                if ensembl_id.upper() in node_ids:
@@ -67,11 +67,11 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
 def get_protein_ids(id_space, proteins):
    if (id_space == 'uniprot'):
        return [p['uniprot_ac'] for p in proteins]
-    if (id_space == 'ensg'):
+    if (id_space == 'ensg' or id_space == 'ensembl'):
        return [p['ensg'] for p in proteins]
    if (id_space == 'symbol'):
        return [p['symbol'] for p in proteins]
-    if (id_space == 'entrez'):
+    if (id_space == 'entrez' or id_space == 'ncbigene'):
        return [p['entrez'] for p in proteins]
    return set()
@@ -95,11 +95,16 @@ def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) ->
    # query protein table
    if len(node_ids) == 0:
        return list()
+    symbol_set, ensg_set, uniprot_set, entrez_set = set(), set(), set(), set()
    id_map = {
-        'symbol:': set(),
+        'symbol:': symbol_set,
-        'uniprot:': set(),
+        'uniprot:': uniprot_set,
-        'ensg:': set(),
+        'ensg:': ensg_set,
-        'entrez:': set()
+        'ncbigene:': entrez_set,
+        'ensembl:': ensg_set,
+        'entrez:': entrez_set
    }
    clean_ids = set()
    for node_id in node_ids:

--- a/tasks/multi_steiner.py
+++ b/tasks/multi_steiner.py
@@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook):
    if ppi_dataset['licenced'] or pdi_dataset['licenced']:
        filename += "_licenced"
    filename = os.path.join(task_hook.data_directory, filename + ".gt")
-    print(filename)
    g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target)
    if custom_edges: