diff --git a/drugstone/backend_tasks.py b/drugstone/backend_tasks.py index a9aabcb202ba63eeb8c96769884d0788216de0d7..89dd52d40812252a265a5f485577da233ff9966f 100755 --- a/drugstone/backend_tasks.py +++ b/drugstone/backend_tasks.py @@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'), decode_responses=False) rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r) - r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'), port=os.getenv('REDIS_PORT', 6379), db=0, decode_responses=True) +identifier_map = { + 'ensembl': 'ensg', + 'ncbigene': 'entrez' +} + def run_task(token, algorithm, parameters): def set_progress(progress, status): @@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters): r.set(f'{token}_job_id', f'{job_id}') r.set(f'{token}_started_at', str(datetime.now().timestamp())) - task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result) + params = json.loads(parameters) + + params['config']['identifier'] = identifier_map.get(params['config']['identifier'], params['config']['identifier']) + + task_hook = TaskHook(params, './data/Networks/', set_progress, set_result) + + task_hook.parameters["config"].get("identifier", "symbol") try: if algorithm == 'dummy': @@ -100,7 +110,7 @@ def refresh_from_redis(task): def start_task(task): - job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30*60) + job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30 * 60) task.job_id = job.id diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py index 1a13741dc17a9d1bab5ef5bb6634580924e86baf..2ce582db53120edeaa4b126073d9fcf1ce26c816 100755 --- a/drugstone/management/commands/make_graphs.py +++ b/drugstone/management/commands/make_graphs.py @@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None: # extend node data by cancer nodes, we create a normal node for each cancer node. # on reading the data, we decide which one to keep based on the user selected cancer types - is_entrez = identifier == 'entrez' + is_entrez = (identifier == 'entrez' or identifier == 'ncbigene') is_symbol = identifier == 'symbol' is_uniprot = identifier == 'uniprot' - is_ensg = identifier == 'ensg' + is_ensg = (identifier == 'ensg' or identifier == 'ensembl') if is_ensg: ensembl_set = defaultdict(set) diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index 33584febaf60379d612000c83ec170938363e4ac..1a63b069a6c1bb835ad1a64140352f997a5e2cc6 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L elif identifier == 'uniprot': protein_attribute = 'uniprot_ac' q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids) - elif identifier == 'ensg': + elif identifier == 'ensg' or identifier == 'ensembl': protein_attribute = 'ensg' dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter( reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids))))) q_list = map(lambda n: Q(id=n), dr_ids) - elif identifier == 'entrez': + elif identifier == 'entrez' or identifier == 'ncbigene': protein_attribute = 'entrez' q_list = map(lambda n: Q(entrez=n), node_ids) if not node_ids: @@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L nodes = list() node_map = defaultdict(list) - if identifier == 'ensg': + if protein_attribute == 'ensg': for node in ProteinSerializer(many=True).to_representation(node_objects): for ensembl_id in node.get(protein_attribute): if ensembl_id.upper() in node_ids: @@ -67,11 +67,11 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L def get_protein_ids(id_space, proteins): if (id_space == 'uniprot'): return [p['uniprot_ac'] for p in proteins] - if (id_space == 'ensg'): + if (id_space == 'ensg' or id_space == 'ensembl'): return [p['ensg'] for p in proteins] if (id_space == 'symbol'): return [p['symbol'] for p in proteins] - if (id_space == 'entrez'): + if (id_space == 'entrez' or id_space == 'ncbigene'): return [p['entrez'] for p in proteins] return set() @@ -95,11 +95,16 @@ def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> # query protein table if len(node_ids) == 0: return list() + + symbol_set, ensg_set, uniprot_set, entrez_set = set(), set(), set(), set() + id_map = { - 'symbol:': set(), - 'uniprot:': set(), - 'ensg:': set(), - 'entrez:': set() + 'symbol:': symbol_set, + 'uniprot:': uniprot_set, + 'ensg:': ensg_set, + 'ncbigene:': entrez_set, + 'ensembl:': ensg_set, + 'entrez:': entrez_set } clean_ids = set() for node_id in node_ids: diff --git a/tasks/multi_steiner.py b/tasks/multi_steiner.py index 210b82af4b1c674c1e8c9c2ef9d68c69cd24892a..5482f05ca8e6e8be0d411bdddb470ec8a4e1e150 100755 --- a/tasks/multi_steiner.py +++ b/tasks/multi_steiner.py @@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook): if ppi_dataset['licenced'] or pdi_dataset['licenced']: filename += "_licenced" filename = os.path.join(task_hook.data_directory, filename + ".gt") - print(filename) g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target) if custom_edges: