diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py index f8b423cefe5e6f7a287f94d9191b94414b87c147..bded48ddf6e1bcddbb4f7e3a5e89b3d97de78273 100644 --- a/drugstone/management/commands/import_from_nedrex.py +++ b/drugstone/management/commands/import_from_nedrex.py @@ -96,16 +96,24 @@ class NedrexImporter: if update: self.cache.init_proteins() + def format_prot_name(name): + if '{' in name: + idx1 = name.index('{') + adjusted_name = name[:idx1 - 1].strip() if idx1 > 0 else '' + if '=' in adjusted_name: + idx2 = adjusted_name.index('=') + return adjusted_name[idx2+1:].strip() + return adjusted_name + return name + def add_protein(node): id = to_id(node['primaryDomainId']) - name = node['geneName'] + name = format_prot_name(node['geneName']) + gene = name + if len(node['synonyms']) > 0: - name = node['synonyms'][0] - if '{' in name: - idx = name.index('{') - if idx > 0: - name = name[:idx - 1] - proteins[id] = models.Protein(uniprot_code=id, protein_name=name, gene=node['geneName']) + name = format_prot_name(node['synonyms'][0]) + proteins[id] = models.Protein(uniprot_code=id, protein_name=name, gene=gene) def add_edges(edge): id = to_id(edge['sourceDomainId']) diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py index 1fd5f5faf04ec08285b2c2050b08dd05211daf1f..429cf6fc97b2fbc0ba07ec9b1051188b227ae9c0 100755 --- a/drugstone/management/commands/populate_db.py +++ b/drugstone/management/commands/populate_db.py @@ -215,20 +215,20 @@ def populate(kwargs): print(f'Populated {n} DrDi associations from DrugBank.') if kwargs['protein_protein']: - # print('Importing PPIs from unlicenced NeDRexDB...') - # n = NedrexImporter.import_protein_protein_interactions(importer, - # DatasetLoader.get_ppi_nedrex(nedrex_api_url_unlicenced, False), - # update) - # total_n += n - # print(f'Imported {n} PPIs from unlicended NeDRexDB') - # print('Importing PPIs from licenced NeDRexDB...') - # n = NedrexImporter.import_protein_protein_interactions(importer, - # DatasetLoader.get_ppi_nedrex(nedrex_api_url_licenced, - # True), - # update) - # total_n += n - # nedrex_update = True - # print(f'Imported {n} PPIs from licended NeDRexDB') + print('Importing PPIs from unlicenced NeDRexDB...') + n = NedrexImporter.import_protein_protein_interactions(importer, + DatasetLoader.get_ppi_nedrex(nedrex_api_url_unlicenced, False), + update) + total_n += n + print(f'Imported {n} PPIs from unlicended NeDRexDB') + print('Importing PPIs from licenced NeDRexDB...') + n = NedrexImporter.import_protein_protein_interactions(importer, + DatasetLoader.get_ppi_nedrex(nedrex_api_url_licenced, + True), + update) + total_n += n + nedrex_update = True + print(f'Imported {n} PPIs from licended NeDRexDB') print('Populating PPIs from STRING...') n = DataPopulator.populate_ppi_string(populator, DatasetLoader.get_ppi_string(), update) total_n += n diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index 0afd48d85cfbae4432454b87ec932cfdfdab0569..39171f50a81c7d9f8daaa4ef61658d34eadd4b5a 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -24,7 +24,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L Returns name of backend attribute of Protein table """ # query protein table - if(len(node_ids) == 0): + if (len(node_ids) == 0): return list(), identifier if identifier == 'symbol': protein_attribute = 'symbol' @@ -70,7 +70,8 @@ def aggregate_nodes(nodes: List[OrderedDict]): for key, value in n.items(): if isinstance(value, list): for e in value: - node[key].add(e) - else: + if e is not None and len(e) > 0: + node[key].add(e) + elif value is not None and len(value) > 0: node[key].add(value) return {k: list(v) for k, v in node.items()}