Skip to content
Snippets Groups Projects
Commit f696e9b8 authored by AndiMajore's avatar AndiMajore
Browse files

added id_space options

Former-commit-id: 25b74e80
parent 276f374f
No related branches found
No related tags found
No related merge requests found
......@@ -14,12 +14,16 @@ qr_r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
decode_responses=False)
rq_tasks = rq.Queue('drugstone_tasks', connection=qr_r)
r = redis.Redis(host=os.getenv('REDIS_HOST', 'redis'),
port=os.getenv('REDIS_PORT', 6379),
db=0,
decode_responses=True)
identifier_map = {
'ensembl': 'ensg',
'ncbigene': 'entrez'
}
def run_task(token, algorithm, parameters):
def set_progress(progress, status):
......@@ -41,7 +45,13 @@ def run_task(token, algorithm, parameters):
r.set(f'{token}_job_id', f'{job_id}')
r.set(f'{token}_started_at', str(datetime.now().timestamp()))
task_hook = TaskHook(json.loads(parameters), './data/Networks/', set_progress, set_result)
params = json.loads(parameters)
params['config']['identifier'] = identifier_map.get(params['config']['identifier'], params['config']['identifier'])
task_hook = TaskHook(params, './data/Networks/', set_progress, set_result)
task_hook.parameters["config"].get("identifier", "symbol")
try:
if algorithm == 'dummy':
......@@ -100,7 +110,7 @@ def refresh_from_redis(task):
def start_task(task):
job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30*60)
job = rq_tasks.enqueue(run_task, task.token, task.algorithm, task.parameters, job_timeout=30 * 60)
task.job_id = job.id
......
......@@ -122,10 +122,10 @@ def create_gt(params: List[str]) -> None:
# extend node data by cancer nodes, we create a normal node for each cancer node.
# on reading the data, we decide which one to keep based on the user selected cancer types
is_entrez = identifier == 'entrez'
is_entrez = (identifier == 'entrez' or identifier == 'ncbigene')
is_symbol = identifier == 'symbol'
is_uniprot = identifier == 'uniprot'
is_ensg = identifier == 'ensg'
is_ensg = (identifier == 'ensg' or identifier == 'ensembl')
if is_ensg:
ensembl_set = defaultdict(set)
......
......@@ -32,12 +32,12 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
elif identifier == 'uniprot':
protein_attribute = 'uniprot_ac'
q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids)
elif identifier == 'ensg':
elif identifier == 'ensg' or identifier == 'ensembl':
protein_attribute = 'ensg'
dr_ids = map(lambda n: n.protein_id, EnsemblGene.objects.filter(
reduce(lambda a, b: a | b, map(lambda n: Q(name__iexact=n), list(node_ids)))))
q_list = map(lambda n: Q(id=n), dr_ids)
elif identifier == 'entrez':
elif identifier == 'entrez' or identifier == 'ncbigene':
protein_attribute = 'entrez'
q_list = map(lambda n: Q(entrez=n), node_ids)
if not node_ids:
......@@ -48,7 +48,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
nodes = list()
node_map = defaultdict(list)
if identifier == 'ensg':
if protein_attribute == 'ensg':
for node in ProteinSerializer(many=True).to_representation(node_objects):
for ensembl_id in node.get(protein_attribute):
if ensembl_id.upper() in node_ids:
......@@ -67,11 +67,11 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L
def get_protein_ids(id_space, proteins):
if (id_space == 'uniprot'):
return [p['uniprot_ac'] for p in proteins]
if (id_space == 'ensg'):
if (id_space == 'ensg' or id_space == 'ensembl'):
return [p['ensg'] for p in proteins]
if (id_space == 'symbol'):
return [p['symbol'] for p in proteins]
if (id_space == 'entrez'):
if (id_space == 'entrez' or id_space == 'ncbigene'):
return [p['entrez'] for p in proteins]
return set()
......@@ -95,11 +95,16 @@ def clean_proteins_from_compact_notation(node_ids: Set[str], identifier: str) ->
# query protein table
if len(node_ids) == 0:
return list()
symbol_set, ensg_set, uniprot_set, entrez_set = set(), set(), set(), set()
id_map = {
'symbol:': set(),
'uniprot:': set(),
'ensg:': set(),
'entrez:': set()
'symbol:': symbol_set,
'uniprot:': uniprot_set,
'ensg:': ensg_set,
'ncbigene:': entrez_set,
'ensembl:': ensg_set,
'entrez:': entrez_set
}
clean_ids = set()
for node_id in node_ids:
......
......@@ -115,7 +115,6 @@ def multi_steiner(task_hook: TaskHook):
if ppi_dataset['licenced'] or pdi_dataset['licenced']:
filename += "_licenced"
filename = os.path.join(task_hook.data_directory, filename + ".gt")
print(filename)
g, seed_ids, _ = read_graph_tool_graph(filename, seeds, id_space, max_deg, target=search_target)
if custom_edges:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment