From 5c324c85e0643663e1b44625a3af660ad17a55e3 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Thu, 23 Jun 2022 18:40:01 +0200
Subject: [PATCH] added nedrex_python support and import for protein table

---
 Dockerfile                                    |   3 +
 .../management/commands/import_from_nedrex.py | 137 +++++++++---------
 drugstone/management/commands/populate_db.py  |   8 +-
 python_nedrex                                 |   1 +
 4 files changed, 79 insertions(+), 70 deletions(-)
 create mode 160000 python_nedrex

diff --git a/Dockerfile b/Dockerfile
index 9f236cf..aec4578 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -24,9 +24,12 @@ RUN pip install gunicorn
 COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 COPY ./docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh
 # COPY ./scripts/ /usr/src/drugstone/scripts/
+COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/
+RUN pip install /usr/src/drugstone/python_nedrex/
 
 COPY . /usr/src/drugstone/
 
+
 #EXPOSE 8000
 
 # ENTRYPOINT ["sh", "/entrypoint.sh"]
diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py
index 39d0001..cc6a26b 100644
--- a/drugstone/management/commands/import_from_nedrex.py
+++ b/drugstone/management/commands/import_from_nedrex.py
@@ -1,68 +1,69 @@
-# from collections import defaultdict
-#
-#
-# def import_proteins():
-#     import python_nedrex as nedrex
-#     from python_nedrex.core import get_nodes, get_api_key, get_edges
-#     from models import Protein
-#
-#     def iter_node_collection(coll_name, eval):
-#         offset = 0
-#         limit = 10000
-#         while True:
-#             result = get_nodes(coll_name, offset=offset, limit=limit)
-#             if not result:
-#                 return
-#             for node in result:
-#                 eval(node)
-#             offset += limit
-#
-#     def iter_edge_collection(coll_name, eval):
-#         offset = 0
-#         limit = 10000
-#         while True:
-#             result = get_edges(coll_name, offset=offset, limit=limit)
-#             if not result:
-#                 return
-#             for edge in result:
-#                 eval(edge)
-#             offset += limit
-#
-#     def add_protein(node):
-#         global proteins
-#         id = node['primaryDomainId']
-#         proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName'])
-#
-#     def add_edges(edge):
-#         global proteins
-#         id = edge['sourceDomainId']
-#         protein = proteins[id]
-#         protein.entrez = edge['targetDomainId'].split('.')[1]
-#         global gene_to_prots
-#         gene_to_prots[edge['targetDomainId']].add(id)
-#
-#     def add_genes(node):
-#         global proteins
-#         global gene_to_prots
-#         id = node['primaryDomainId']
-#         for prot_id in gene_to_prots[id]:
-#             protein = proteins[prot_id]
-#             try:
-#                 protein.protein_name = node['synonyms'][0]
-#             except:
-#                 pass
-#
-#     nedrex.config.set_url_base("http://82.148.225.92:8123/")
-#     api_key = get_api_key(accept_eula=True)
-#     nedrex.config.set_api_key(api_key)
-#
-#     proteins = dict()
-#     gene_to_prots = defaultdict(lambda: set())
-#
-#     print('Importing Proteins')
-#     iter_node_collection('protein', add_protein)
-#     print('Importing Protein-Gene mapping')
-#     iter_edge_collection('protein_encoded_by_gene', add_edges)
-#     print('Mapping Gene information')
-#     iter_node_collection('gene', add_genes)
-#     Protein.objects.bulk_create(proteins.values())
+from collections import defaultdict
+
+
+class nedrex_importer:
+
+    proteins = dict()
+
+    def import_proteins(self):
+        import python_nedrex as nedrex
+        from python_nedrex.core import get_nodes, get_api_key, get_edges
+        from drugstone.models import Protein
+
+        gene_to_prots = defaultdict(lambda: set())
+
+        def iter_node_collection(coll_name, eval):
+            offset = 0
+            limit = 10000
+            while True:
+                result = get_nodes(coll_name, offset=offset, limit=limit)
+                if not result:
+                    return
+                for node in result:
+                    eval(node)
+                offset += limit
+
+        def iter_edge_collection(coll_name, eval):
+            offset = 0
+            limit = 10000
+            while True:
+                result = get_edges(coll_name, offset=offset, limit=limit)
+                if not result:
+                    return
+                for edge in result:
+                    eval(edge)
+                offset += limit
+
+        def add_protein(node):
+            id = node['primaryDomainId']
+            self.proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName'])
+
+        def add_edges(edge):
+            id = edge['sourceDomainId']
+            protein = self.proteins[id]
+            protein.entrez = edge['targetDomainId'].split('.')[1]
+            gene_to_prots[edge['targetDomainId']].add(id)
+
+        def add_genes(node):
+            id = node['primaryDomainId']
+            for prot_id in gene_to_prots[id]:
+                protein = self.proteins[prot_id]
+                try:
+                    protein.protein_name = node['synonyms'][0]
+                except:
+                    pass
+
+        nedrex.config.set_url_base("http://82.148.225.92:8123/")
+        api_key = get_api_key(accept_eula=True)
+        nedrex.config.set_api_key(api_key)
+
+
+
+        print('Importing Proteins')
+        iter_node_collection('protein', add_protein)
+        print('Importing Protein-Gene mapping')
+        iter_edge_collection('protein_encoded_by_gene', add_edges)
+        print('Mapping Gene information')
+        iter_node_collection('gene', add_genes)
+        Protein.objects.bulk_create(self.proteins.values())
+        return len(self.proteins)
diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py
index 2408eb3..a0f7693 100755
--- a/drugstone/management/commands/populate_db.py
+++ b/drugstone/management/commands/populate_db.py
@@ -6,7 +6,7 @@ from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset,
 from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction
 
 from drugstone.management.includes.DataPopulator import DataPopulator
-
+from .import_from_nedrex import nedrex_importer
 
 class DatabasePopulator:
     def __init__(self, data_dir,
@@ -99,6 +99,8 @@ class Command(BaseCommand):
                                         tissue_expression_file=exp_file,
                                         )
 
+        importer = nedrex_importer()
+
         if kwargs['delete_model'] is not None:
             model_list = kwargs['delete_model'].split(',')
             db_populator.delete_models(model_list)
@@ -127,7 +129,9 @@ class Command(BaseCommand):
 
         if kwargs['proteins'] is not None:
             print('Populating Proteins...')
-            n = DataPopulator.populate_proteins(populator)
+
+            n = nedrex_importer.import_proteins(nedrex_importer)
+            # n = DataPopulator.populate_proteins(populator)
             print(f'Populated {n} Proteins.')
             
             print('Populating ENSG IDs...')
diff --git a/python_nedrex b/python_nedrex
new file mode 160000
index 0000000..ee1cd32
--- /dev/null
+++ b/python_nedrex
@@ -0,0 +1 @@
+Subproject commit ee1cd32fd15f6b73647df70bacb9d0ebd7858236
-- 
GitLab