From 5c324c85e0643663e1b44625a3af660ad17a55e3 Mon Sep 17 00:00:00 2001 From: AndiMajore <andi.majore@googlemail.com> Date: Thu, 23 Jun 2022 18:40:01 +0200 Subject: [PATCH] added nedrex_python support and import for protein table --- Dockerfile | 3 + .../management/commands/import_from_nedrex.py | 137 +++++++++--------- drugstone/management/commands/populate_db.py | 8 +- python_nedrex | 1 + 4 files changed, 79 insertions(+), 70 deletions(-) create mode 160000 python_nedrex diff --git a/Dockerfile b/Dockerfile index 9f236cf..aec4578 100755 --- a/Dockerfile +++ b/Dockerfile @@ -24,9 +24,12 @@ RUN pip install gunicorn COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY ./docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh # COPY ./scripts/ /usr/src/drugstone/scripts/ +COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/ +RUN pip install /usr/src/drugstone/python_nedrex/ COPY . /usr/src/drugstone/ + #EXPOSE 8000 # ENTRYPOINT ["sh", "/entrypoint.sh"] diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py index 39d0001..cc6a26b 100644 --- a/drugstone/management/commands/import_from_nedrex.py +++ b/drugstone/management/commands/import_from_nedrex.py @@ -1,68 +1,69 @@ -# from collections import defaultdict -# -# -# def import_proteins(): -# import python_nedrex as nedrex -# from python_nedrex.core import get_nodes, get_api_key, get_edges -# from models import Protein -# -# def iter_node_collection(coll_name, eval): -# offset = 0 -# limit = 10000 -# while True: -# result = get_nodes(coll_name, offset=offset, limit=limit) -# if not result: -# return -# for node in result: -# eval(node) -# offset += limit -# -# def iter_edge_collection(coll_name, eval): -# offset = 0 -# limit = 10000 -# while True: -# result = get_edges(coll_name, offset=offset, limit=limit) -# if not result: -# return -# for edge in result: -# eval(edge) -# offset += limit -# -# def add_protein(node): -# global proteins -# id = node['primaryDomainId'] -# proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName']) -# -# def add_edges(edge): -# global proteins -# id = edge['sourceDomainId'] -# protein = proteins[id] -# protein.entrez = edge['targetDomainId'].split('.')[1] -# global gene_to_prots -# gene_to_prots[edge['targetDomainId']].add(id) -# -# def add_genes(node): -# global proteins -# global gene_to_prots -# id = node['primaryDomainId'] -# for prot_id in gene_to_prots[id]: -# protein = proteins[prot_id] -# try: -# protein.protein_name = node['synonyms'][0] -# except: -# pass -# -# nedrex.config.set_url_base("http://82.148.225.92:8123/") -# api_key = get_api_key(accept_eula=True) -# nedrex.config.set_api_key(api_key) -# -# proteins = dict() -# gene_to_prots = defaultdict(lambda: set()) -# -# print('Importing Proteins') -# iter_node_collection('protein', add_protein) -# print('Importing Protein-Gene mapping') -# iter_edge_collection('protein_encoded_by_gene', add_edges) -# print('Mapping Gene information') -# iter_node_collection('gene', add_genes) -# Protein.objects.bulk_create(proteins.values()) +from collections import defaultdict + + +class nedrex_importer: + + proteins = dict() + + def import_proteins(self): + import python_nedrex as nedrex + from python_nedrex.core import get_nodes, get_api_key, get_edges + from drugstone.models import Protein + + gene_to_prots = defaultdict(lambda: set()) + + def iter_node_collection(coll_name, eval): + offset = 0 + limit = 10000 + while True: + result = get_nodes(coll_name, offset=offset, limit=limit) + if not result: + return + for node in result: + eval(node) + offset += limit + + def iter_edge_collection(coll_name, eval): + offset = 0 + limit = 10000 + while True: + result = get_edges(coll_name, offset=offset, limit=limit) + if not result: + return + for edge in result: + eval(edge) + offset += limit + + def add_protein(node): + id = node['primaryDomainId'] + self.proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName']) + + def add_edges(edge): + id = edge['sourceDomainId'] + protein = self.proteins[id] + protein.entrez = edge['targetDomainId'].split('.')[1] + gene_to_prots[edge['targetDomainId']].add(id) + + def add_genes(node): + id = node['primaryDomainId'] + for prot_id in gene_to_prots[id]: + protein = self.proteins[prot_id] + try: + protein.protein_name = node['synonyms'][0] + except: + pass + + nedrex.config.set_url_base("http://82.148.225.92:8123/") + api_key = get_api_key(accept_eula=True) + nedrex.config.set_api_key(api_key) + + + + print('Importing Proteins') + iter_node_collection('protein', add_protein) + print('Importing Protein-Gene mapping') + iter_edge_collection('protein_encoded_by_gene', add_edges) + print('Mapping Gene information') + iter_node_collection('gene', add_genes) + Protein.objects.bulk_create(self.proteins.values()) + return len(self.proteins) diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py index 2408eb3..a0f7693 100755 --- a/drugstone/management/commands/populate_db.py +++ b/drugstone/management/commands/populate_db.py @@ -6,7 +6,7 @@ from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset, from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction from drugstone.management.includes.DataPopulator import DataPopulator - +from .import_from_nedrex import nedrex_importer class DatabasePopulator: def __init__(self, data_dir, @@ -99,6 +99,8 @@ class Command(BaseCommand): tissue_expression_file=exp_file, ) + importer = nedrex_importer() + if kwargs['delete_model'] is not None: model_list = kwargs['delete_model'].split(',') db_populator.delete_models(model_list) @@ -127,7 +129,9 @@ class Command(BaseCommand): if kwargs['proteins'] is not None: print('Populating Proteins...') - n = DataPopulator.populate_proteins(populator) + + n = nedrex_importer.import_proteins(nedrex_importer) + # n = DataPopulator.populate_proteins(populator) print(f'Populated {n} Proteins.') print('Populating ENSG IDs...') diff --git a/python_nedrex b/python_nedrex new file mode 160000 index 0000000..ee1cd32 --- /dev/null +++ b/python_nedrex @@ -0,0 +1 @@ +Subproject commit ee1cd32fd15f6b73647df70bacb9d0ebd7858236 -- GitLab