Skip to content
Snippets Groups Projects
Commit f696e9b8 authored by AndiMajore's avatar AndiMajore
Browse files

added id_space options

Former-commit-id: 25b74e80
parent 276f374f
No related branches found
No related tags found
No related merge requests found
...@@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'), ...@@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
decode_responses=False) decode_responses=False)
rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r) rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r)
r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'), r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
port=os.getenv('REDIS_PORT', 6379), port=os.getenv('REDIS_PORT', 6379),
db=0, db=0,
decode_responses=True) decode_responses=True)
identifier_map = {
'ensembl': 'ensg',
'ncbigene': 'entrez'
}
def run_task(token, algorithm, parameters): def run_task(token, algorithm, parameters):
def set_progress(progress, status): def set_progress(progress, status):
...@@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters): ...@@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters):
r.set(f'{token}_job_id', f'{job_id}') r.set(f'{token}_job_id', f'{job_id}')
r.set(f'{token}_started_at', str(datetime.now().timestamp())) r.set(f'{token}_started_at', str(datetime.now().timestamp()))
task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result) params = json.loads(parameters)
params['config']['identifier'] = identifier_map.get(params['config']['identifier'], params['config']['identifier'])
task_hook = TaskHook(params, './data/Networks/', set_progress, set_result)
task_hook.parameters["config"].get("identifier", "symbol")
try: try:
if algorithm == 'dummy': if algorithm == 'dummy':
...@@ -100,7 +110,7 @@ def refresh_from_redis(task): ...@@ -100,7 +110,7 @@ def refresh_from_redis(task):
def start_task(task): def start_task(task):
job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30*60) job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30 * 60)
task.job_id = job.id task.job_id = job.id
......
...@@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None: ...@@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None:
# extend node data by cancer nodes, we create a normal node for each cancer node. # extend node data by cancer nodes, we create a normal node for each cancer node.
# on reading the data, we decide which one to keep based on the user selected cancer types # on reading the data, we decide which one to keep based on the user selected cancer types
is_entrez = identifier == 'entrez' is_entrez = (identifier == 'entrez' or identifier == 'ncbigene')
is_symbol = identifier == 'symbol' is_symbol = identifier == 'symbol'
is_uniprot = identifier == 'uniprot' is_uniprot = identifier == 'uniprot'
is_ensg = identifier == 'ensg' is_ensg = (identifier == 'ensg' or identifier == 'ensembl')
if is_ensg: if is_ensg:
ensembl_set = defaultdict(set) ensembl_set = defaultdict(set)
......
...@@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L ...@@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
elif identifier == 'uniprot': elif identifier == 'uniprot':
protein_attribute = 'uniprot_ac' protein_attribute = 'uniprot_ac'
q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids) q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
elif identifier == 'ensg': elif identifier == 'ensg' or identifier == 'ensembl':
protein_attribute = 'ensg' protein_attribute = 'ensg'
dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter( dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids))))) reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids)))))
q_list = map(lambda n: Q(id=n), dr_ids) q_list = map(lambda n: Q(id=n), dr_ids)
elif identifier == 'entrez': elif identifier == 'entrez' or identifier == 'ncbigene':
protein_attribute = 'entrez' protein_attribute = 'entrez'
q_list = map(lambda n: Q(entrez=n), node_ids) q_list = map(lambda n: Q(entrez=n), node_ids)
if not node_ids: if not node_ids:
...@@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L ...@@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
nodes = list() nodes = list()
node_map = defaultdict(list) node_map = defaultdict(list)
if identifier == 'ensg': if protein_attribute == 'ensg':
for node in ProteinSerializer(many=True).to_representation(node_objects): for node in ProteinSerializer(many=True).to_representation(node_objects):
for ensembl_id in node.get(protein_attribute): for ensembl_id in node.get(protein_attribute):
if ensembl_id.upper() in node_ids: if ensembl_id.upper() in node_ids:
...@@ -67,11 +67,11 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L ...@@ -67,11 +67,11 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
def get_protein_ids(id_space, proteins): def get_protein_ids(id_space, proteins):
if (id_space == 'uniprot'): if (id_space == 'uniprot'):
return [p['uniprot_ac'] for p in proteins] return [p['uniprot_ac'] for p in proteins]
if (id_space == 'ensg'): if (id_space == 'ensg' or id_space == 'ensembl'):
return [p['ensg'] for p in proteins] return [p['ensg'] for p in proteins]
if (id_space == 'symbol'): if (id_space == 'symbol'):
return [p['symbol'] for p in proteins] return [p['symbol'] for p in proteins]
if (id_space == 'entrez'): if (id_space == 'entrez' or id_space == 'ncbigene'):
return [p['entrez'] for p in proteins] return [p['entrez'] for p in proteins]
return set() return set()
...@@ -95,11 +95,16 @@ def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) -> ...@@ -95,11 +95,16 @@ def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) ->
# query protein table # query protein table
if len(node_ids) == 0: if len(node_ids) == 0:
return list() return list()
symbol_set, ensg_set, uniprot_set, entrez_set = set(), set(), set(), set()
id_map = { id_map = {
'symbol:': set(), 'symbol:': symbol_set,
'uniprot:': set(), 'uniprot:': uniprot_set,
'ensg:': set(), 'ensg:': ensg_set,
'entrez:': set() 'ncbigene:': entrez_set,
'ensembl:': ensg_set,
'entrez:': entrez_set
} }
clean_ids = set() clean_ids = set()
for node_id in node_ids: for node_id in node_ids:
......
...@@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook): ...@@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook):
if ppi_dataset['licenced'] or pdi_dataset['licenced']: if ppi_dataset['licenced'] or pdi_dataset['licenced']:
filename += "_licenced" filename += "_licenced"
filename = os.path.join(task_hook.data_directory, filename + ".gt") filename = os.path.join(task_hook.data_directory, filename + ".gt")
print(filename)
g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target) g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target)
if custom_edges: if custom_edges:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment