Skip to content
Snippets Groups Projects
Commit e70bba60 authored by AndiMajore's avatar AndiMajore
Browse files

added nedrex_python support and import for protein table

Former-commit-id: 1c68634f15ccb66ef4804394bae4f79e2afb39d7 [formerly 6571f711d6b4dc06abe8b04b32e560850f759f5d]
Former-commit-id: ba0caed5921ec5463bedc96ef2fd2e2f48f0f1d2
parent ca0e5a7b
No related branches found
No related tags found
No related merge requests found
...@@ -24,9 +24,12 @@ RUN pip install gunicorn ...@@ -24,9 +24,12 @@ RUN pip install gunicorn
COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY ./docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh COPY ./docker-entrypoint.sh /usr/src/drugstone/docker-entrypoint.sh
# COPY ./scripts/ /usr/src/drugstone/scripts/ # COPY ./scripts/ /usr/src/drugstone/scripts/
COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/
RUN pip install /usr/src/drugstone/python_nedrex/
COPY . /usr/src/drugstone/ COPY . /usr/src/drugstone/
#EXPOSE 8000 #EXPOSE 8000
# ENTRYPOINT ["sh", "/entrypoint.sh"] # ENTRYPOINT ["sh", "/entrypoint.sh"]
# from collections import defaultdict from collections import defaultdict
#
#
# def import_proteins(): class nedrex_importer:
# import python_nedrex as nedrex
# from python_nedrex.core import get_nodes, get_api_key, get_edges proteins = dict()
# from models import Protein
# def import_proteins(self):
# def iter_node_collection(coll_name, eval): import python_nedrex as nedrex
# offset = 0 from python_nedrex.core import get_nodes, get_api_key, get_edges
# limit = 10000 from drugstone.models import Protein
# while True:
# result = get_nodes(coll_name, offset=offset, limit=limit) gene_to_prots = defaultdict(lambda: set())
# if not result:
# return def iter_node_collection(coll_name, eval):
# for node in result: offset = 0
# eval(node) limit = 10000
# offset += limit while True:
# result = get_nodes(coll_name, offset=offset, limit=limit)
# def iter_edge_collection(coll_name, eval): if not result:
# offset = 0 return
# limit = 10000 for node in result:
# while True: eval(node)
# result = get_edges(coll_name, offset=offset, limit=limit) offset += limit
# if not result:
# return def iter_edge_collection(coll_name, eval):
# for edge in result: offset = 0
# eval(edge) limit = 10000
# offset += limit while True:
# result = get_edges(coll_name, offset=offset, limit=limit)
# def add_protein(node): if not result:
# global proteins return
# id = node['primaryDomainId'] for edge in result:
# proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName']) eval(edge)
# offset += limit
# def add_edges(edge):
# global proteins def add_protein(node):
# id = edge['sourceDomainId'] id = node['primaryDomainId']
# protein = proteins[id] self.proteins[id] = Protein(uniprot_code=id.split('.')[1], gene=node['geneName'])
# protein.entrez = edge['targetDomainId'].split('.')[1]
# global gene_to_prots def add_edges(edge):
# gene_to_prots[edge['targetDomainId']].add(id) id = edge['sourceDomainId']
# protein = self.proteins[id]
# def add_genes(node): protein.entrez = edge['targetDomainId'].split('.')[1]
# global proteins gene_to_prots[edge['targetDomainId']].add(id)
# global gene_to_prots
# id = node['primaryDomainId'] def add_genes(node):
# for prot_id in gene_to_prots[id]: id = node['primaryDomainId']
# protein = proteins[prot_id] for prot_id in gene_to_prots[id]:
# try: protein = self.proteins[prot_id]
# protein.protein_name = node['synonyms'][0] try:
# except: protein.protein_name = node['synonyms'][0]
# pass except:
# pass
# nedrex.config.set_url_base("http://82.148.225.92:8123/")
# api_key = get_api_key(accept_eula=True) nedrex.config.set_url_base("http://82.148.225.92:8123/")
# nedrex.config.set_api_key(api_key) api_key = get_api_key(accept_eula=True)
# nedrex.config.set_api_key(api_key)
# proteins = dict()
# gene_to_prots = defaultdict(lambda: set())
#
# print('Importing Proteins') print('Importing Proteins')
# iter_node_collection('protein', add_protein) iter_node_collection('protein', add_protein)
# print('Importing Protein-Gene mapping') print('Importing Protein-Gene mapping')
# iter_edge_collection('protein_encoded_by_gene', add_edges) iter_edge_collection('protein_encoded_by_gene', add_edges)
# print('Mapping Gene information') print('Mapping Gene information')
# iter_node_collection('gene', add_genes) iter_node_collection('gene', add_genes)
# Protein.objects.bulk_create(proteins.values()) Protein.objects.bulk_create(self.proteins.values())
return len(self.proteins)
...@@ -6,7 +6,7 @@ from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset, ...@@ -6,7 +6,7 @@ from drugstone.models import Protein, Drug, Tissue, ExpressionLevel, PPIDataset,
from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction from drugstone.models import ProteinProteinInteraction, ProteinDrugInteraction
from drugstone.management.includes.DataPopulator import DataPopulator from drugstone.management.includes.DataPopulator import DataPopulator
from .import_from_nedrex import nedrex_importer
class DatabasePopulator: class DatabasePopulator:
def __init__(self, data_dir, def __init__(self, data_dir,
...@@ -99,6 +99,8 @@ class Command(BaseCommand): ...@@ -99,6 +99,8 @@ class Command(BaseCommand):
tissue_expression_file=exp_file, tissue_expression_file=exp_file,
) )
importer = nedrex_importer()
if kwargs['delete_model'] is not None: if kwargs['delete_model'] is not None:
model_list = kwargs['delete_model'].split(',') model_list = kwargs['delete_model'].split(',')
db_populator.delete_models(model_list) db_populator.delete_models(model_list)
...@@ -127,7 +129,9 @@ class Command(BaseCommand): ...@@ -127,7 +129,9 @@ class Command(BaseCommand):
if kwargs['proteins'] is not None: if kwargs['proteins'] is not None:
print('Populating Proteins...') print('Populating Proteins...')
n = DataPopulator.populate_proteins(populator)
n = nedrex_importer.import_proteins(nedrex_importer)
# n = DataPopulator.populate_proteins(populator)
print(f'Populated {n} Proteins.') print(f'Populated {n} Proteins.')
print('Populating ENSG IDs...') print('Populating ENSG IDs...')
......
Subproject commit ee1cd32fd15f6b73647df70bacb9d0ebd7858236
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment