Skip to content
Snippets Groups Projects
Commit e70bba60 authored by AndiMajore's avatar AndiMajore
Browse files

added nedrex_python support and import for protein table

Former-commit-id: 1c68634f15ccb66ef4804394bae4f79e2afb39d7 [formerly 6571f711d6b4dc06abe8b04b32e560850f759f5d]
Former-commit-id: ba0caed5921ec5463bedc96ef2fd2e2f48f0f1d2
parent ca0e5a7b
No related branches found
No related tags found
No related merge requests found
......@@ -24,9 +24,12 @@ RUN pip install gunicorn
COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY ./docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh
# COPY ./scripts/ /usr/src/drugstone/scripts/
COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/
RUN pip install /usr/src/drugstone/python_nedrex/
COPY . /usr/src/drugstone/
#EXPOSE 8000
# ENTRYPOINT ["sh", "/entrypoint.sh"]
# from collections import defaultdict
#
#
# def import_proteins():
# import python_nedrex as nedrex
# from python_nedrex.core import get_nodes, get_api_key, get_edges
# from models import Protein
#
# def iter_node_collection(coll_name, eval):
# offset = 0
# limit = 10000
# while True:
# result = get_nodes(coll_name, offset=offset, limit=limit)
# if not result:
# return
# for node in result:
# eval(node)
# offset += limit
#
# def iter_edge_collection(coll_name, eval):
# offset = 0
# limit = 10000
# while True:
# result = get_edges(coll_name, offset=offset, limit=limit)
# if not result:
# return
# for edge in result:
# eval(edge)
# offset += limit
#
# def add_protein(node):
# global proteins
# id = node['primaryDomainId']
# proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName'])
#
# def add_edges(edge):
# global proteins
# id = edge['sourceDomainId']
# protein = proteins[id]
# protein.entrez = edge['targetDomainId'].split('.')[1]
# global gene_to_prots
# gene_to_prots[edge['targetDomainId']].add(id)
#
# def add_genes(node):
# global proteins
# global gene_to_prots
# id = node['primaryDomainId']
# for prot_id in gene_to_prots[id]:
# protein = proteins[prot_id]
# try:
# protein.protein_name = node['synonyms'][0]
# except:
# pass
#
# nedrex.config.set_url_base("http://82.148.225.92:8123/")
# api_key = get_api_key(accept_eula=True)
# nedrex.config.set_api_key(api_key)
#
# proteins = dict()
# gene_to_prots = defaultdict(lambda: set())
#
# print('Importing Proteins')
# iter_node_collection('protein', add_protein)
# print('Importing Protein-Gene mapping')
# iter_edge_collection('protein_encoded_by_gene', add_edges)
# print('Mapping Gene information')
# iter_node_collection('gene', add_genes)
# Protein.objects.bulk_create(proteins.values())
from collections import defaultdict
class nedrex_importer:
proteins = dict()
def import_proteins(self):
import python_nedrex as nedrex
from python_nedrex.core import get_nodes, get_api_key, get_edges
from drugstone.models import Protein
gene_to_prots = defaultdict(lambda: set())
def iter_node_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_nodes(coll_name, offset=offset, limit=limit)
if not result:
return
for node in result:
eval(node)
offset += limit
def iter_edge_collection(coll_name, eval):
offset = 0
limit = 10000
while True:
result = get_edges(coll_name, offset=offset, limit=limit)
if not result:
return
for edge in result:
eval(edge)
offset += limit
def add_protein(node):
id = node['primaryDomainId']
self.proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName'])
def add_edges(edge):
id = edge['sourceDomainId']
protein = self.proteins[id]
protein.entrez = edge['targetDomainId'].split('.')[1]
gene_to_prots[edge['targetDomainId']].add(id)
def add_genes(node):
id = node['primaryDomainId']
for prot_id in gene_to_prots[id]:
protein = self.proteins[prot_id]
try:
protein.protein_name = node['synonyms'][0]
except:
pass
nedrex.config.set_url_base("http://82.148.225.92:8123/")
api_key = get_api_key(accept_eula=True)
nedrex.config.set_api_key(api_key)
print('Importing Proteins')
iter_node_collection('protein', add_protein)
print('Importing Protein-Gene mapping')
iter_edge_collection('protein_encoded_by_gene', add_edges)
print('Mapping Gene information')
iter_node_collection('gene', add_genes)
Protein.objects.bulk_create(self.proteins.values())
return len(self.proteins)
......@@ -6,7 +6,7 @@ from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset,
from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction
from drugstone.management.includes.DataPopulator import DataPopulator
from .import_from_nedrex import nedrex_importer
class DatabasePopulator:
def __init__(self, data_dir,
......@@ -99,6 +99,8 @@ class Command(BaseCommand):
tissue_expression_file=exp_file,
)
importer = nedrex_importer()
if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list)
......@@ -127,7 +129,9 @@ class Command(BaseCommand):
if kwargs['proteins'] is not None:
print('Populating Proteins...')
n = DataPopulator.populate_proteins(populator)
n = nedrex_importer.import_proteins(nedrex_importer)
# n = DataPopulator.populate_proteins(populator)
print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...')
......
Subproject commit ee1cd32fd15f6b73647df70bacb9d0ebd7858236
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment