diff --git a/docker-django.env.dev b/docker-django.env.dev index c0f3aa4316170325e46f3794e79220b655873414..66f78b1f970a4ac073d22ccfba4b36d54559d5bc 100644 --- a/docker-django.env.dev +++ b/docker-django.env.dev @@ -14,4 +14,4 @@ DJANGO_SETTINGS_MODULE=drugstone.settings CELERY_BROKER_URL=redis://redis:6379/0 FLOWER_PORT=8888 FLOWER_BASIC_AUTH=drugstone:test -GT_THREADS=1 \ No newline at end of file +GT_THREADS=8 \ No newline at end of file diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py index 29c3cbb481ab72f4a2d4f94353607c81a75253ac..f8b423cefe5e6f7a287f94d9191b94414b87c147 100644 --- a/drugstone/management/commands/import_from_nedrex.py +++ b/drugstone/management/commands/import_from_nedrex.py @@ -151,6 +151,7 @@ class NedrexImporter: def import_drugs(self, update): self.set_licenced(False) + drugs = dict() if update: self.cache.init_drugs() @@ -237,14 +238,18 @@ class NedrexImporter: if not update or e.__hash__() not in existing: bulk.add(e) for source in edge['dataSources']: - if not licenced or is_licenced(source): - bulk.add(models.ProteinDrugInteraction(pdi_dataset=get_dataset(source), drug=drug, + if licenced: + if not is_licenced(source): + continue + bulk.add(models.ProteinDrugInteraction(pdi_dataset=get_dataset(source), drug=drug, protein=protein)) except KeyError: pass iter_edge_collection('drug_has_target', add_dpi) models.ProteinDrugInteraction.objects.bulk_create(bulk) + new_datasets = [dataset, source_datasets.values()] + DatasetLoader.remove_old_pdi_data(new_datasets, licenced) return len(bulk) def import_protein_protein_interactions(self, dataset: PPIDataset, update): @@ -292,15 +297,19 @@ class NedrexImporter: if not update or e.__hash__() not in existing: bulk.append(e) for source in edge['dataSources']: - if not licenced or is_licenced(source): - bulk.append( - models.ProteinProteinInteraction(ppi_dataset=get_dataset(source), from_protein=protein1, - to_protein=protein2)) + if licenced: + if not is_licenced(source): + continue + bulk.append( + models.ProteinProteinInteraction(ppi_dataset=get_dataset(source), from_protein=protein1, + to_protein=protein2)) except KeyError: pass iter_ppi(add_ppi) models.ProteinProteinInteraction.objects.bulk_create(bulk) + new_datasets = [dataset, source_datasets.values()] + DatasetLoader.remove_old_ppi_data(new_datasets, licenced) return len(bulk) def import_protein_disorder_associations(self, dataset, update): @@ -338,8 +347,10 @@ class NedrexImporter: if not update or e.__hash__() not in existing: bulk.add(e) for source in edge['dataSources']: - if not licenced or is_licenced(source): - bulk.add( + if licenced: + if not is_licenced(source): + continue + bulk.add( models.ProteinDisorderAssociation(pdis_dataset=get_dataset(source), protein=protein, disorder=disorder, score=edge['score'])) @@ -348,6 +359,8 @@ class NedrexImporter: iter_edge_collection('gene_associated_with_disorder', add_pdis) models.ProteinDisorderAssociation.objects.bulk_create(bulk) + new_datasets = [dataset, source_datasets.values()] + DatasetLoader.remove_old_pdis_data(new_datasets, licenced) return len(bulk) def import_drug_disorder_indications(self, dataset, update): @@ -384,8 +397,10 @@ class NedrexImporter: if not update or e.__hash__() not in existing: bulk.add(e) for source in edge['dataSources']: - if not licenced or is_licenced(source): - bulk.add( + if licenced: + if not is_licenced(source): + continue + bulk.add( models.DrugDisorderIndication(drdi_dataset=get_dataset(source), drug=drug, disorder=disorder)) except KeyError: @@ -393,4 +408,6 @@ class NedrexImporter: iter_edge_collection('drug_has_indication', add_drdis) models.DrugDisorderIndication.objects.bulk_create(bulk) + new_datasets = [dataset, source_datasets.values()] + DatasetLoader.remove_old_drdi_data(new_datasets, licenced) return len(bulk) diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py index e08a6fe61e6bff90ea103148900e4e689e037289..ad23f090d31a05b8a509d36cf01821f16c1ed7b2 100644 --- a/drugstone/management/includes/DatasetLoader.py +++ b/drugstone/management/includes/DatasetLoader.py @@ -45,6 +45,15 @@ def get_nedrex_version(): return version +def get_nedrex_source_version(source): + metadata = get_metadata()['source_databases'] + # TODO remove once fixed in nedrex db + if 'drug_central' in metadata: + metadata['drugcentral'] = metadata['drug_central'] + + return metadata[source]['date'] + + def get_drug_target_nedrex(url, licenced): dataset, _ = models.PDIDataset.objects.get_or_create( name='NeDRex', @@ -65,7 +74,7 @@ def get_ppi_nedrex(url, licenced): return dataset -def get_protein_disorder_nedrex(url,licenced): +def get_protein_disorder_nedrex(url, licenced): dataset, _ = models.PDisDataset.objects.get_or_create( name='NeDRex', link=url, @@ -75,7 +84,7 @@ def get_protein_disorder_nedrex(url,licenced): return dataset -def get_drug_disorder_nedrex(url,licenced): +def get_drug_disorder_nedrex(url, licenced): dataset, _ = models.DrDiDataset.objects.get_or_create( name='NeDRex', link=url, @@ -86,7 +95,7 @@ def get_drug_disorder_nedrex(url,licenced): def write_license(text): - with open(LICENSE_FILE,'w') as fh: + with open(LICENSE_FILE, 'w') as fh: fh.write(text) @@ -172,7 +181,7 @@ def get_today_version(): def get_ppi_nedrex_dataset(url, licenced, source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -188,7 +197,7 @@ def get_ppi_nedrex_dataset(url, licenced, source): def get_pdi_nedrex_dataset(url, licenced, source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -200,10 +209,11 @@ def get_pdi_nedrex_dataset(url, licenced, source): ) return dataset + def get_pdis_nedrex_dataset(url, licenced, source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -215,10 +225,11 @@ def get_pdis_nedrex_dataset(url, licenced, source): ) return dataset + def get_drdi_nedrex_dataset(url, licenced, source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -230,10 +241,11 @@ def get_drdi_nedrex_dataset(url, licenced, source): ) return dataset + def is_licenced_ppi_source(source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -243,10 +255,11 @@ def is_licenced_ppi_source(source): return True return False + def is_licenced_pdi_source(source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -256,10 +269,11 @@ def is_licenced_pdi_source(source): return True return False + def is_licenced_pdis_source(source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -269,10 +283,11 @@ def is_licenced_pdis_source(source): return True return False + def is_licenced_drdi_source(source): version = get_today_version() try: - version = get_metadata()['source_databases'][source]['date'] + version = get_nedrex_source_version(source) except RetryError: pass @@ -281,3 +296,43 @@ def is_licenced_drdi_source(source): except: return True return False + + +def remove_old_pdi_data(new_datasets, licenced): + for dataset in new_datasets: + try: + for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced): + if d != dataset: + d.delete() + except: + continue + + +def remove_old_ppi_data(new_datasets, licenced): + for dataset in new_datasets: + try: + for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced): + if d != dataset: + d.delete() + except: + continue + + +def remove_old_pdis_data(new_datasets, licenced): + for dataset in new_datasets: + try: + for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced): + if d != dataset: + d.delete() + except: + continue + + +def remove_old_drdi_data(new_datasets, licenced): + for dataset in new_datasets: + try: + for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced): + if d != dataset: + d.delete() + except: + continue diff --git a/drugstone/settings/settings.py b/drugstone/settings/settings.py index 7752e44db1d1f659bb6ee91934ba987a9fd6ddfc..e274b939ad22c3d39a64578ad62306b5f8d7d686 100755 --- a/drugstone/settings/settings.py +++ b/drugstone/settings/settings.py @@ -169,3 +169,6 @@ CACHES = { CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL') # timezones: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones CELERY_TIMEZONE = 'Europe/Berlin' + + +DEFAULTS = { 'ppi': 'NeDRex', 'pdi': 'NeDRex', 'pdis': 'NeDRex', 'drdi': 'NeDRex'} diff --git a/drugstone/urls.py b/drugstone/urls.py index a44eeb9f83a2fb4f9f8681e8eeee83c54c9429ce..74605cf9806f7a1406d1c316be5bdfc6452523ae 100755 --- a/drugstone/urls.py +++ b/drugstone/urls.py @@ -29,7 +29,7 @@ urlpatterns = [ path('tasks/', tasks_view), path('task_result/', result_view), path('graph_export/', graph_export), - path('query_proteins/', query_proteins), + # path('query_proteins/', query_proteins), path('query_tissue_proteins/', query_tissue_proteins), path('adjacent_drugs/', adjacent_drugs), path('adjacent_disorders/', adjacent_disorders), diff --git a/drugstone/views.py b/drugstone/views.py index 5ccbc9b7f38b276a4250c402e7c117b5c436284c..19cde80ae401e70c81927490550cc0641c971c13 100755 --- a/drugstone/views.py +++ b/drugstone/views.py @@ -16,116 +16,55 @@ from rest_framework.response import Response from rest_framework.views import APIView from drugstone.util.query_db import query_proteins_by_identifier -from drugstone import models -from drugstone import serializers - -from drugstone.models import Protein, Task, ProteinDrugInteraction, \ - Drug, Tissue, ExpressionLevel, Network, ProteinDisorderAssociation, DrugDisorderIndication, Disorder, DrDiDataset, \ - PDIDataset, PDisDataset, PPIDataset -from drugstone.serializers import ProteinSerializer, TaskSerializer, \ - ProteinDrugInteractionSerializer, DrugSerializer, TaskStatusSerializer, TissueSerializer, NetworkSerializer, \ - ProteinDisorderAssociationSerializer, DisorderSerializer, DrugDisorderIndicationSerializer +from drugstone.models import * +from drugstone.serializers import * from drugstone.backend_tasks import start_task, refresh_from_redis, task_stats, task_result, task_parameters - -# we might want to replace this class with some ProteinProteinInteraction view of user input proteins - -# class ProteinViralInteractionView(APIView): -# """ -# Protein-Virus-Interaction Network -# """ -# -# def get(self, request): -# if not request.query_params.get('data'): -# proteins = Protein.objects.all() -# effects = ViralProtein.objects.all() -# edges = ProteinViralInteraction.objects.all() -# -# network = { -# 'proteins': ProteinSerializer(many=True).to_representation(proteins), -# 'effects': ViralProteinSerializer(many=True).to_representation(effects), -# 'edges': ProteinViralInteractionSerializer(many=True).to_representation(edges), -# } -# return Response(network) -# -# dataset_virus_list = json.loads(request.query_params.get('data', '[]')) -# effects = [] -# for dataset_name, virus_name in dataset_virus_list: -# dataset_virus_object = DatasetVirus.objects.get(dataset=dataset_name, virus=virus_name) -# effects.extend(list(ViralProtein.objects.filter(dataset_virus=dataset_virus_object).all())) -# -# edges = [] -# proteins = [] -# for effect in effects: -# edge_objects = ProteinViralInteraction.objects.filter(effect=effect) -# for edge_object in edge_objects: -# edges.append(edge_object) -# -# if edge_object.protein not in proteins: -# proteins.append(edge_object.protein) -# -# network = { -# 'proteins': ProteinSerializer(many=True).to_representation(proteins), -# 'effects': ViralProteinSerializer(many=True).to_representation(effects), -# 'edges': ProteinViralInteractionSerializer(many=True).to_representation(edges), -# } -# return Response(network) - - -# class ProteinDrugInteractionView(APIView): -# """ -# Protein-Drug-Interaction Network -# """ -# -# def get(self, request) -> Response: -# if request.query_params.get('proteins'): -# print("getting drugs for proteins") -# protein_ac_list = json.loads(request.query_params.get('proteins')) -# proteins = list(Protein.objects.filter(uniprot_code__in=protein_ac_list).all()) -# else: -# proteins = [] -# task = Task.objects.get(token=request.query_params['token']) -# result = task_result(task) -# network = result['network'] -# node_attributes = result.get('node_attributes') -# if not node_attributes: -# node_attributes = {} -# node_types = node_attributes.get('node_types') -# if not node_types: -# node_types = {} -# nodes = network['nodes'] -# for node in nodes: -# node_type = node_types.get(node) -# details = None -# # if not node_type: -# # print('we should not see this 1') -# # node_type, details = infer_node_type_and_details(node) -# if node_type == 'protein': -# if details: -# proteins.append(details) -# else: -# try: -# proteins.append(Protein.objects.get(uniprot_code=node)) -# except Protein.DoesNotExist: -# pass -# -# pd_interactions = [] -# drugs = [] -# -# for protein in proteins: -# pdi_object_list = ProteinDrugInteraction.objects.filter(protein=protein) -# for pdi_object in pdi_object_list: -# pd_interactions.append(pdi_object) -# drug = pdi_object.drug -# if drug not in drugs: -# drugs.append(drug) -# -# protein_drug_edges = { -# 'proteins': ProteinSerializer(many=True).to_representation(proteins), -# 'drugs': DrugSerializer(many=True).to_representation(drugs), -# 'edges': ProteinDrugInteractionSerializer(many=True).to_representation(pd_interactions), -# } -# return Response(protein_drug_edges) +from drugstone.settings import DEFAULTS + + +def get_ppi_ds(source, licenced): + try: + ds = models.PPIDataset.objects.filter(name__iexact=source, licenced=licenced).last() + ds.id + return ds + except: + if licenced: + return get_ppi_ds(source, False) + return None + + +def get_pdi_ds(source, licenced): + try: + ds = models.PDIDataset.objects.filter(name__iexact=source, licenced=licenced).last() + ds.id + return ds + except: + if licenced: + return get_pdi_ds(source, False) + return None + + +def get_pdis_ds(source, licenced): + try: + ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last() + ds.id + return ds + except: + if licenced: + return get_pdis_ds(source, False) + return None + + +def get_drdis_ds(source, licenced): + try: + ds = models.PDisDataset.objects.filter(name__iexact=source, licenced=licenced).last() + ds.id + return ds + except: + if licenced: + return get_pdis_ds(source, False) + return None class TaskView(APIView): @@ -134,12 +73,16 @@ class TaskView(APIView): chars = string.ascii_lowercase + string.ascii_uppercase + string.digits token_str = ''.join(random.choice(chars) for _ in range(32)) parameters = request.data['parameters'] + licenced = parameters.get('licenced', False) # find databases based on parameter strings - parameters['ppi_dataset'] = serializers.PPIDatasetSerializer().to_representation( - models.PPIDataset.objects.filter(name__iexact=parameters.get('ppi_dataset', 'STRING')).last()) - parameters['pdi_dataset'] = serializers.PDIDatasetSerializer().to_representation( - models.PDIDataset.objects.filter(name__iexact=parameters.get('pdi_dataset', 'DrugBank')).last()) + print(get_ppi_ds(parameters.get('ppi_dataset', DEFAULTS['ppi']), licenced)) + parameters['ppi_dataset'] = PPIDatasetSerializer().to_representation( + get_ppi_ds(parameters.get('ppi_dataset', DEFAULTS['ppi']), licenced)) + + print(get_pdi_ds(parameters.get('pdi_dataset', DEFAULTS['pdi']), licenced)) + parameters['pdi_dataset'] = PDIDatasetSerializer().to_representation( + get_pdi_ds(parameters.get('pdi_dataset', DEFAULTS['pdi']), licenced)) task = Task.objects.create(token=token_str, target=request.data['target'], @@ -166,13 +109,13 @@ class TaskView(APIView): 'stats': task_stats(task), }) + @api_view(['GET']) def get_license(request) -> Response: from drugstone.management.includes.DatasetLoader import import_license return Response({'license': import_license()}) - @api_view(['POST']) def fetch_edges(request) -> Response: """Retrieves interactions between nodes given as a list of drugstone IDs. @@ -184,13 +127,14 @@ def fetch_edges(request) -> Response: Returns: Response: List of edges which are objects with 'from' and to ' attribtues' """ - dataset = request.data.get('dataset', 'STRING') + dataset = request.data.get('dataset', DEFAULTS['ppi']) drugstone_ids = [node['drugstone_id'][1:] for node in request.data.get('nodes', '[]') if 'drugstone_id' in node] - dataset_object = models.PPIDataset.objects.filter(name__iexact=dataset).last() + licenced = request.data.get('licenced', False) + dataset_object = get_ppi_ds(dataset, licenced) interaction_objects = models.ProteinProteinInteraction.objects.filter( Q(ppi_dataset=dataset_object) & Q(from_protein__in=drugstone_ids) & Q(to_protein__in=drugstone_ids)) - return Response(serializers.ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects)) + return Response(ProteinProteinInteractionSerializer(many=True).to_representation(interaction_objects)) @api_view(['POST']) @@ -257,25 +201,6 @@ def tasks_view(request) -> Response: return Response(tasks_info) -# def infer_node_type_and_details(node) -> Tuple[str, Protein or Drug]: -# node_type_indicator = node[0] -# if node_type_indicator == 'p': -# node_id = int(node[1:]) -# # protein -# prot = Protein.objects.get(id=node_id) -# return 'protein', prot -# elif node_type_indicator == 'd': -# node_id = int(node[2:]) -# # drug -# if node_id[0] == 'r': -# drug = Drug.objects.get(id=node_id[1:]) -# return 'drug', drug -# elif node_id[0] == 'i': -# disorder = Disorder.objects.get(id=node_id[1:]) -# return 'disorder', disorder -# return None, None - - @api_view(['POST']) def create_network(request) -> Response: if 'network' not in request.data: @@ -417,8 +342,9 @@ def result_view(request) -> Response: if 'autofill_edges' in parameters['config'] and parameters['config']['autofill_edges']: proteins = set(map(lambda n: n[node_name_attribute][1:], filter(lambda n: node_name_attribute in n, parameters['input_network']['nodes']))) - dataset = 'STRING' if 'interaction_protein_protein' not in parameters['config'] else parameters['config'][ - 'interaction_protein_protein'] + dataset = DEFAULTS['ppi'] if 'interaction_protein_protein' not in parameters['config'] else \ + parameters['config'][ + 'interaction_protein_protein'] dataset_object = models.PPIDataset.objects.filter(name__iexact=dataset).last() interaction_objects = models.ProteinProteinInteraction.objects.filter( Q(ppi_dataset=dataset_object) & Q(from_protein__in=proteins) & Q(to_protein__in=proteins)) @@ -545,20 +471,20 @@ def adjacent_disorders(request) -> Response: data = request.data if 'proteins' in data: drugstone_ids = data.get('proteins', []) - pdi_dataset = PDisDataset.objects.filter(name__iexact=data.get('dataset', 'DisGeNET')).last() + pdi_dataset = get_pdis_ds(data.get('dataset', DEFAULTS['pdis']), data.get('licenced', False)) # find adjacent drugs by looking at drug-protein edges pdis_objects = ProteinDisorderAssociation.objects.filter(protein__id__in=drugstone_ids, - pdis_dataset=pdi_dataset) + pdis_dataset_id=pdi_dataset.id) disorders = {e.disorder for e in pdis_objects} # serialize edges = ProteinDisorderAssociationSerializer(many=True).to_representation(pdis_objects) disorders = DisorderSerializer(many=True).to_representation(disorders) elif 'drugs' in data: drugstone_ids = data.get('drugs', []) - drdi_dataset = DrDiDataset.objects.filter(name__iexact=data.get('dataset', 'DrugBank')).last() + drdi_dataset = get_drdis_ds(data.get('dataset', DEFAULTS['drdi']), data.get('licenced', False)) # find adjacent drugs by looking at drug-protein edges drdi_objects = DrugDisorderIndication.objects.filter(drug__id__in=drugstone_ids, - drdi_dataset=drdi_dataset) + drdi_dataset_id=drdi_dataset.id) disorders = {e.disorder for e in drdi_objects} # serialize edges = DrugDisorderIndicationSerializer(many=True).to_representation(drdi_objects) @@ -581,9 +507,9 @@ def adjacent_drugs(request) -> Response: """ data = request.data drugstone_ids = data.get('proteins', []) - pdi_dataset = PDIDataset.objects.filter(name__iexact=data.get('pdi_dataset', 'NeDRex')).last() + pdi_dataset = get_pdi_ds(data.get('pdi_dataset', DEFAULTS['pdi']), data.get('licenced', False)) # find adjacent drugs by looking at drug-protein edges - pdi_objects = ProteinDrugInteraction.objects.filter(protein__id__in=drugstone_ids, pdi_dataset=pdi_dataset) + pdi_objects = ProteinDrugInteraction.objects.filter(protein__id__in=drugstone_ids, pdi_dataset_id=pdi_dataset.id) drugs = {e.drug for e in pdi_objects} # serialize pdis = ProteinDrugInteractionSerializer(many=True).to_representation(pdi_objects) diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index 43bf32df3da5898d25d96f8abd734edf4872ea3c..db7a2d39ad5d4040b6dd34294c675ea20ae7b9bf 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone python3 manage.py migrate python3 manage.py createfixtures python3 manage.py cleanuptasks -python3 manage.py populate_db --update -a -python3 manage.py make_graphs +#python3 manage.py populate_db --update -a +#python3 manage.py make_graphs /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" diff --git a/tasks/betweenness_centrality.py b/tasks/betweenness_centrality.py index 65f654cda611da813a5fc15affe90e1499506ae4..d839c5ebfee95f844a00c1a549b054548b39d1f9 100755 --- a/tasks/betweenness_centrality.py +++ b/tasks/betweenness_centrality.py @@ -75,7 +75,7 @@ def betweenness_centrality(task_hook: TaskHook): Network-Based Prioritization of Candidate Disease Genes or Other Molecules, Bioinformatics 29(11), 2013, pp. 1471-1473, https://doi.org/10.1093/bioinformatics/btt164. """ - + # Type: list of str # Semantics: Names of the seed proteins. Use UNIPROT IDs for host proteins, and # names of the for SARS_CoV2_<IDENTIFIER> (e.g., SARS_CoV2_ORF6) for @@ -84,7 +84,7 @@ def betweenness_centrality(task_hook: TaskHook): # utility in frontend. # Acceptable values: UNIPROT IDs, identifiers of viral proteins. seeds = task_hook.parameters["seeds"] - + # Type: str. # Semantics: The virus strain for which the analysis should be run, or the # string literal "drugs" (if used for ranking drugs). @@ -95,7 +95,6 @@ def betweenness_centrality(task_hook: TaskHook): # Acceptable values: "PPI", "PPDr" # target_or_drugs = task_hook.parameters.get("target_or_drugs", "PPI") - # Type: list of str. # Semantics: The datasets which should be considered for the analysis. # Example: ["Krogan", "TUM"]. @@ -104,7 +103,7 @@ def betweenness_centrality(task_hook: TaskHook): # Reasonable default: []. # Acceptable values: "Krogan", "TUM". # datasets = task_hook.parameters.get("datasets", []) - + # Type: list of str. # Semantics: Virus-host edge types which should be ignored for the analysis. # Example: ["Overexpression"]. @@ -113,14 +112,14 @@ def betweenness_centrality(task_hook: TaskHook): # Reasonable default: []. # Acceptable values: "AP-MS", "overexpression". # ignored_edge_types = task_hook.parameters.get("ignored_edge_types", []) - + # Type: bool # Semantics: Sepcifies whether also drugs targeting interactors of the seeds should be considered. # Example: False. # Reasonable default: False. # Has no effect unless trust_rank.py is used for ranking drugs. include_indirect_drugs = task_hook.parameters.get("include_indirect_drugs", False) - + # Type: bool # Semantics: Sepcifies whether should be included in the analysis when ranking drugs. # Example: False. @@ -134,7 +133,7 @@ def betweenness_centrality(task_hook: TaskHook): # Reasonable default: False. # Has no effect when the algorithm is used for ranking drugs. # ignore_non_seed_baits = task_hook.parameters.get("ignore_non_seed_baits", False) - + # Type: int. # Semantics: Number of returned proteins. # Example: 20. @@ -155,7 +154,7 @@ def betweenness_centrality(task_hook: TaskHook): # Reasonable default: 0. # Acceptable values: Floats between 0 and 1. hub_penalty = task_hook.parameters.get("hub_penalty", 0.0) - + # Type: int. # Semantics: Number of threads used for running the analysis. # Example: 1. @@ -170,24 +169,27 @@ def betweenness_centrality(task_hook: TaskHook): search_target = task_hook.parameters.get("target", "drug-target") filterPaths = task_hook.parameters.get("filter_paths", True) - + # Parsing input file. task_hook.set_progress(0 / 3.0, "Parsing input.") - file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt") + filename = f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}" + if ppi_dataset['licenced'] or pdi_dataset['licenced']: + filename += "_licenced" + filename = os.path.join(task_hook.data_directory, filename+".gt") g, seed_ids, drug_ids = read_graph_tool_graph( - file_path, - seeds, - max_deg, - include_indirect_drugs, - include_non_approved_drugs, - target=search_target - ) + filename, + seeds, + max_deg, + include_indirect_drugs, + include_non_approved_drugs, + target=search_target + ) weights = edge_weights(g, hub_penalty) # Set number of threads if OpenMP support is enabled. if gt.openmp_enabled(): gt.openmp_set_num_threads(num_threads) - + # Call graph-tool to compute betweenness centrality. task_hook.set_progress(1 / 3.0, "Computing betweenness centralities.") scores = g.new_vertex_property("float") @@ -201,20 +203,19 @@ def betweenness_centrality(task_hook: TaskHook): if num_paths > 0: local_scores.a /= num_paths scores.a += local_scores.a - + # Compute and return the results. task_hook.set_progress(2 / 3.0, "Formating results.") task_hook.set_results( - scores_to_results( - search_target, - result_size, - g, - seed_ids, - drug_ids, - scores, - ppi_dataset, - pdi_dataset, - filterPaths + scores_to_results( + search_target, + result_size, + g, + seed_ids, + drug_ids, + scores, + ppi_dataset, + pdi_dataset, + filterPaths ) - ) - + ) diff --git a/tasks/closeness_centrality.py b/tasks/closeness_centrality.py index 1767df320221f3b650fdb2754c689f32b31cc86b..a90014f963f8381dbe369f4ff943ff6b5922c9bc 100755 --- a/tasks/closeness_centrality.py +++ b/tasks/closeness_centrality.py @@ -170,9 +170,12 @@ def closeness_centrality(task_hook: TaskHook): # Parsing input file. task_hook.set_progress(0 / 4.0, "Parsing input.") - file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt") + filename = f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}" + if ppi_dataset['licenced'] or pdi_dataset['licenced']: + filename += "_licenced" + filename = os.path.join(task_hook.data_directory, filename+".gt") # g, seed_ids, viral_protein_ids, drug_ids = read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits, include_indirect_drugs, include_non_approved_drugs) - g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target) + g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target) task_hook.set_progress(1 / 4.0, "Computing edge weights.") weights = edge_weights(g, hub_penalty) diff --git a/tasks/degree_centrality.py b/tasks/degree_centrality.py index 112d9abc5ccee06510d4b65a9f550028a6218b82..bceffec86520757d2bb81aad7e150484c546e1eb 100755 --- a/tasks/degree_centrality.py +++ b/tasks/degree_centrality.py @@ -150,9 +150,12 @@ def degree_centrality(task_hook: TaskHook): # Parsing input file. task_hook.set_progress(0 / 3.0, "Parsing input.") - file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt") + filename = f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}" + if ppi_dataset['licenced'] or pdi_dataset['licenced']: + filename += "_licenced" + filename = os.path.join(task_hook.data_directory, filename+".gt") # g, seed_ids, viral_protein_ids, drug_ids = read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits, False, include_non_approved_drugs) - g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, False, include_non_approved_drugs, search_target) + g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, max_deg, False, include_non_approved_drugs, search_target) # Set number of threads if OpenMP support is enabled. if gt.openmp_enabled(): diff --git a/tasks/multi_steiner.py b/tasks/multi_steiner.py index fa0912e89ca810eab55b9308ec4b749ab336a009..f8e9a80e7f69160702d24d403ffc88fa5406e77c 100755 --- a/tasks/multi_steiner.py +++ b/tasks/multi_steiner.py @@ -105,9 +105,11 @@ def multi_steiner(task_hook: TaskHook): # Parsing input file. task_hook.set_progress(0 / (float(num_trees + 3)), "Parsing input.") - file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt") - # g, seed_ids, _, _ = read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits) - g, seed_ids, _ = read_graph_tool_graph(file_path, seeds, max_deg, target=search_target) + filename = f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}" + if ppi_dataset['licenced'] or pdi_dataset['licenced']: + filename += "_licenced" + filename = os.path.join(task_hook.data_directory, filename+".gt") + g, seed_ids, _ = read_graph_tool_graph(filename, seeds, max_deg, target=search_target) # seed_map = {g.vertex_properties["name"][node]: node for node in seed_ids} seed_map = {g.vertex_properties[node_name_attribute][node]: node for node in seed_ids} task_hook.set_progress(1 / (float(num_trees + 3)), "Computing edge weights.") diff --git a/tasks/network_proximity.py b/tasks/network_proximity.py index 55d86513d1cec0cca5f55e67b7ec9cc624d524a2..90b7708d9b21df88a766dfccee1758d34fcf7bb3 100755 --- a/tasks/network_proximity.py +++ b/tasks/network_proximity.py @@ -86,9 +86,12 @@ def network_proximity(task_hook: TaskHook): # Parsing input file. task_hook.set_progress(0.0 / 8, "Parsing input.") - file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt") + filename = f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}" + if ppi_dataset['licenced'] or pdi_dataset['licenced']: + filename += "_licenced" + filename = os.path.join(task_hook.data_directory, filename+".gt") # g, seed_ids, _, drug_ids = read_graph_tool_graph(file_path, seeds, "", "", max_deg, False, True, include_non_approved_drugs) - g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, True, include_non_approved_drugs, target=search_target) + g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, max_deg, True, include_non_approved_drugs, target=search_target) # Computing edge weights. task_hook.set_progress(1.0 / 8, "Computing edge weights.") weights = edge_weights(g, hub_penalty) diff --git a/tasks/trust_rank.py b/tasks/trust_rank.py index a01fbe412b3266863b8d1df0f9bd3cced74481e2..ceda67cffe62902865f5f10f84823765389fb41b 100755 --- a/tasks/trust_rank.py +++ b/tasks/trust_rank.py @@ -198,8 +198,11 @@ def trust_rank(task_hook: TaskHook): # Parsing input file. task_hook.set_progress(0 / 4.0, "Parsing input.") - file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt") - g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target) + filename = f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}" + if ppi_dataset['licenced'] or pdi_dataset['licenced']: + filename += "_licenced" + filename = os.path.join(task_hook.data_directory, filename+".gt") + g, seed_ids, drug_ids = read_graph_tool_graph(filename, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target) task_hook.set_progress(1 / 4.0, "Computing edge weights.") weights = edge_weights(g, hub_penalty, inverse=True)