diff --git a/.dockerignore b/.dockerignore index 6b8710a711f3b689885aa5c26c6c06bde348e82b..c7eda2bee653e65357adbfa8b640920e027c8640 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,3 @@ .git +data +venv diff --git a/.gitignore b/.gitignore index 61228f1f1878cc5e7103c4c491bf97302e8b513c..eba7b164c2dfcb3290ff160e2ad49bead31fc2d2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,10 @@ venv/ *.DS_Store db.sqlite3 supervisord.log -supervisord.log supervisord.pid docker-entrypoint.lock celerybeat-schedule.bak celerybeat-schedule.dat celerybeat-schedule.dir docker-django.env.prodnetworks.zip +data/Networks/ diff --git a/README.md b/README.md index 3a8396b51f8b9e8fd9ad22a7c80c597f1180997a..b72c7768c8c22e5e5815273162a16d8aa4bd8bcd 100755 --- a/README.md +++ b/README.md @@ -26,3 +26,7 @@ python3 manage.py make_graphs ### Docker DEV environment (building is optional) ``docker-compose -f docker-compose.yml up -d --build`` + +### Data folder +Static datasets are mounted from a directory now, instead of fusing them into the image. Download them from the following link and put them into the data folder that is mounted by the docker-compose.yml: +https://wolken.zbh.uni-hamburg.de/index.php/s/gywnL3HP26CWrgA diff --git a/docker-compose.yml b/docker-compose.yml index 0b285adb1c0c876bdcc3a06ce301d9c452f25960..abf89f1adbd8fb2fc40467a5a4b1ebd71f744e09 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,7 +13,7 @@ services: restart: always volumes: - drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations - - drugstone_data_volume:/usr/src/drugstone/data + - ./data:/usr/src/drugstone/data ports: - 8001:8000 networks: @@ -60,7 +60,7 @@ services: container_name: drugstone_celery hostname: drugstone_celery volumes: - - drugstone_data_volume:/usr/src/drugstone/data + - ./data:/usr/src/drugstone/data env_file: - './docker-django.env.dev' depends_on: diff --git a/drugstone/management/commands/import_from_nedrex.py b/drugstone/management/commands/import_from_nedrex.py index bc09491bb6f232a8f6bce5dbf71ef5866b9932a4..dd8a434264392a77306afa48bb1b282af9cea010 100644 --- a/drugstone/management/commands/import_from_nedrex.py +++ b/drugstone/management/commands/import_from_nedrex.py @@ -256,8 +256,8 @@ class NedrexImporter: iter_edge_collection('drug_has_target', add_dpi) models.ProteinDrugInteraction.objects.bulk_create(bulk) - new_datasets = [dataset].extend(source_datasets.values()) - DatasetLoader.remove_old_pdi_data(new_datasets, licenced) + # new_datasets = [dataset].extend(source_datasets.values()) + # DatasetLoader.remove_old_pdi_data(new_datasets, licenced) return len(bulk) def import_protein_protein_interactions(self, dataset: PPIDataset, update): @@ -316,8 +316,8 @@ class NedrexImporter: iter_ppi(add_ppi) models.ProteinProteinInteraction.objects.bulk_create(bulk) - new_datasets = [dataset, source_datasets.values()] - DatasetLoader.remove_old_ppi_data(new_datasets, licenced) + # new_datasets = [dataset, source_datasets.values()] + # DatasetLoader.remove_old_ppi_data(new_datasets, licenced) return len(bulk) def import_protein_disorder_associations(self, dataset, update): @@ -367,8 +367,8 @@ class NedrexImporter: iter_edge_collection('gene_associated_with_disorder', add_pdis) models.ProteinDisorderAssociation.objects.bulk_create(bulk) - new_datasets = [dataset, source_datasets.values()] - DatasetLoader.remove_old_pdis_data(new_datasets, licenced) + # new_datasets = [dataset, source_datasets.values()] + # DatasetLoader.remove_old_pdis_data(new_datasets, licenced) return len(bulk) def import_drug_disorder_indications(self, dataset, update): @@ -416,6 +416,6 @@ class NedrexImporter: iter_edge_collection('drug_has_indication', add_drdis) models.DrugDisorderIndication.objects.bulk_create(bulk) - new_datasets = [dataset, source_datasets.values()] - DatasetLoader.remove_old_drdi_data(new_datasets, licenced) + # new_datasets = [dataset, source_datasets.values()] + # DatasetLoader.remove_old_drdi_data(new_datasets, licenced) return len(bulk) diff --git a/drugstone/management/commands/populate_db.py b/drugstone/management/commands/populate_db.py index 4df7c6a14b3b63fbe61436033eeb4c7c97402b89..5bc37ed7e45baa944bfe1542aa0f9040c3fd4915 100755 --- a/drugstone/management/commands/populate_db.py +++ b/drugstone/management/commands/populate_db.py @@ -10,8 +10,8 @@ from drugstone.management.includes.DataPopulator import DataPopulator from .import_from_nedrex import NedrexImporter from drugstone.management.includes.NodeCache import NodeCache from drugstone.management.includes import DatasetLoader -from ..includes.DatasetLoader import remove_old_pdi_data, remove_old_ppi_data, remove_old_pdis_data, \ - remove_old_drdi_data +# from ..includes.DatasetLoader import remove_old_pdi_data, remove_old_ppi_data, remove_old_pdis_data, \ +# remove_old_drdi_data class DatabasePopulator: diff --git a/drugstone/management/includes/DataLoader.py b/drugstone/management/includes/DataLoader.py index af969892fbe1f84ecdae3ab3eb36cf25e8a569dc..958d45a2fcaec6dbeb8310b89647c990d904c3d4 100755 --- a/drugstone/management/includes/DataLoader.py +++ b/drugstone/management/includes/DataLoader.py @@ -226,7 +226,7 @@ class DataLoader: # @staticmethod # def load_pdis_disgenet() -> pd.DataFrame: - # """Loads the DisGeNET PDis associations with UniprotAC Numbers and Mondo IDs + # """Loads the DisGeNET PDis associations with Uniprot Numbers and Mondo IDs # # Returns: # pd.DataFrame: columns "protein_name", "disorder_name" and "score" diff --git a/drugstone/management/includes/DatasetLoader.py b/drugstone/management/includes/DatasetLoader.py index 859fc1179fcd55d0c6e68675aad9a851d69ee31d..062b7d99dc62655b7f7ebc7cd8e4f8435fd6abe5 100644 --- a/drugstone/management/includes/DatasetLoader.py +++ b/drugstone/management/includes/DatasetLoader.py @@ -300,61 +300,69 @@ def is_licenced_drdi_source(source): return False -def remove_old_pdi_data(new_datasets, licenced): - for dataset in new_datasets: - print("Deleting all except "+str(dataset)) - try: - for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: "+str(d)) - if d != dataset: - print("Deleting: "+str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue - - -def remove_old_ppi_data(new_datasets, licenced): - for dataset in new_datasets: - print("Deleting all except " + str(dataset)) - try: - for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: " + str(d)) - if d != dataset: - print("Deleting: " + str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue - - -def remove_old_pdis_data(new_datasets, licenced): - for dataset in new_datasets: - print("Deleting all except " + str(dataset)) - try: - for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: " + str(d)) - if d != dataset: - print("Deleting: " + str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue - - -def remove_old_drdi_data(new_datasets, licenced): - for dataset in new_datasets: - print("Deleting all except " + str(dataset)) - try: - for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced): - print("Testing: " + str(d)) - if d != dataset: - print("Deleting: " + str(d)) - d.delete() - except Exception as e: - print("Error when trying to delete old datasets") - print(e) - continue +# def remove_old_pdi_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except "+str(dataset)) +# try: +# for d in models.PDIDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: "+str(d)) +# if d != dataset: +# print("Deleting: "+str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue + + +# def remove_old_ppi_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except " + str(dataset)) +# try: +# for d in models.PPIDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: " + str(d)) +# if d != dataset: +# print("Deleting: " + str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue + + +# def remove_old_pdis_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except " + str(dataset)) +# try: +# for d in models.PDisDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: " + str(d)) +# if d != dataset: +# print("Deleting: " + str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue + + +# def remove_old_drdi_data(new_datasets, licenced): +# if new_datasets is None: +# return +# for dataset in new_datasets: +# print("Deleting all except " + str(dataset)) +# try: +# for d in models.DrDiDataset.objects.filter(name=dataset.name, licenced=licenced): +# print("Testing: " + str(d)) +# if d != dataset: +# print("Deleting: " + str(d)) +# d.delete() +# except Exception as e: +# print("Error when trying to delete old datasets") +# print(e) +# continue diff --git a/drugstone/serializers.py b/drugstone/serializers.py index 7fca1113fde8c48be9bc9d6d06be5b462541f490..3805f42ca025ff7e5b7fa071aeee24dfadb5ad78 100755 --- a/drugstone/serializers.py +++ b/drugstone/serializers.py @@ -30,7 +30,7 @@ class DrDisDatasetSerializer(serializers.ModelSerializer): class ProteinNodeSerializer(serializers.ModelSerializer): drugstone_id = serializers.SerializerMethodField() - uniprot_ac = serializers.SerializerMethodField() + uniprot = serializers.SerializerMethodField() symbol = serializers.SerializerMethodField() ensg = serializers.SerializerMethodField() entrez = serializers.SerializerMethodField() @@ -38,7 +38,7 @@ class ProteinNodeSerializer(serializers.ModelSerializer): def get_drugstone_id(self, obj): return [f'p{obj.id}'] - def get_uniprot_ac(self, obj): + def get_uniprot(self, obj): return [obj.uniprot_code] def get_symbol(self, obj): @@ -61,19 +61,19 @@ class ProteinNodeSerializer(serializers.ModelSerializer): class Meta: model = Protein - fields = ['drugstone_id', 'uniprot_ac', 'symbol', 'protein_name', 'entrez', 'ensg'] + fields = ['drugstone_id', 'uniprot', 'symbol', 'protein_name', 'entrez', 'ensg'] class ProteinSerializer(serializers.ModelSerializer): drugstone_id = serializers.SerializerMethodField() - uniprot_ac = serializers.SerializerMethodField() + uniprot = serializers.SerializerMethodField() symbol = serializers.SerializerMethodField() ensg = serializers.SerializerMethodField() def get_drugstone_id(self, obj): return f'p{obj.id}' - def get_uniprot_ac(self, obj): + def get_uniprot(self, obj): return obj.uniprot_code def get_symbol(self, obj): @@ -93,7 +93,7 @@ class ProteinSerializer(serializers.ModelSerializer): class Meta: model = Protein - fields = ['drugstone_id', 'uniprot_ac', 'symbol', 'protein_name', 'entrez', 'ensg'] + fields = ['drugstone_id', 'uniprot', 'symbol', 'protein_name', 'entrez', 'ensg'] class DrugSerializer(serializers.ModelSerializer): diff --git a/drugstone/util/query_db.py b/drugstone/util/query_db.py index 1a63b069a6c1bb835ad1a64140352f997a5e2cc6..a589cf2b551d85cfbdbbfa8e4ff7a604202785f1 100644 --- a/drugstone/util/query_db.py +++ b/drugstone/util/query_db.py @@ -30,7 +30,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L protein_attribute = 'symbol' q_list = map(lambda n: Q(gene__iexact=n), node_ids) elif identifier == 'uniprot': - protein_attribute = 'uniprot_ac' + protein_attribute = 'uniprot' q_list = map(lambda n: Q(uniprot_code__iexact=n), node_ids) elif identifier == 'ensg' or identifier == 'ensembl': protein_attribute = 'ensg' @@ -66,7 +66,7 @@ def query_proteins_by_identifier(node_ids: Set[str], identifier: str) -> Tuple[L def get_protein_ids(id_space, proteins): if (id_space == 'uniprot'): - return [p['uniprot_ac'] for p in proteins] + return [p['uniprot'] for p in proteins] if (id_space == 'ensg' or id_space == 'ensembl'): return [p['ensg'] for p in proteins] if (id_space == 'symbol'): diff --git a/drugstone/views.py b/drugstone/views.py index 5024babdb97b772f2e90d2b6e3bf2beff13c5a88..45ff92405eaa2e2388aed9e92dfccbfc2d47d34b 100755 --- a/drugstone/views.py +++ b/drugstone/views.py @@ -60,10 +60,6 @@ class TaskView(APIView): parameters = request.data['parameters'] licenced = parameters.get('licenced', False) - print(models.PDIDataset.objects.all()) - - print(get_ppi_ds(parameters.get('ppi_dataset', DEFAULTS['ppi']), licenced)) - print(get_pdi_ds(parameters.get('pdi_dataset', DEFAULTS['pdi']), licenced)) # find databases based on parameter strings parameters['ppi_dataset'] = PPIDatasetSerializer().to_representation( @@ -172,7 +168,6 @@ def map_nodes(request) -> Response: # change data structure to dict in order to be quicker when merging nodes_mapped_dict = {id.upper(): node for node in nodes_mapped for id in node[id_key]} - print(nodes_mapped_dict) # merge fetched data with given data to avoid data loss for node in nodes: @@ -231,13 +226,29 @@ def create_network(request) -> Response: return Response(id) +def latest_datasets(ds): + dataset_dict = {} + for d in ds: + name = d.name + "_" + str(d.licenced) + if name not in dataset_dict: + dataset_dict[name] = d + continue + if dataset_dict[name].version < d.version: + dataset_dict[name] = d + return dataset_dict.values() + + @api_view(['GET']) def get_datasets(request) -> Response: datasets = {} - datasets['protein-protein'] = PPIDatasetSerializer(many=True).to_representation(PPIDataset.objects.all()) - datasets['protein-drug'] = PDIDatasetSerializer(many=True).to_representation(PDIDataset.objects.all()) - datasets['protein-disorder'] = PDisDatasetSerializer(many=True).to_representation(PDisDataset.objects.all()) - datasets['drug-disorder'] = DrDisDatasetSerializer(many=True).to_representation(DrDiDataset.objects.all()) + datasets['protein-protein'] = PPIDatasetSerializer(many=True).to_representation( + latest_datasets(PPIDataset.objects.all())) + datasets['protein-drug'] = PDIDatasetSerializer(many=True).to_representation( + latest_datasets(PDIDataset.objects.all())) + datasets['protein-disorder'] = PDisDatasetSerializer(many=True).to_representation( + latest_datasets(PDisDataset.objects.all())) + datasets['drug-disorder'] = DrDisDatasetSerializer(many=True).to_representation( + latest_datasets(DrDiDataset.objects.all())) return Response(datasets) @@ -275,6 +286,7 @@ def result_view(request) -> Response: if not node_types: node_types = {} node_attributes['node_types'] = node_types + is_seed = node_attributes.get('is_seed') if not is_seed: is_seed = {} @@ -336,7 +348,7 @@ def result_view(request) -> Response: else: continue - nodes_mapped, _ = query_proteins_by_identifier(protein_nodes, identifier) + nodes_mapped, identifier = query_proteins_by_identifier(protein_nodes, identifier) nodes_mapped_dict = {node[identifier][0]: node for node in nodes_mapped} @@ -366,11 +378,10 @@ def result_view(request) -> Response: for node_id, detail in node_details.items(): if 'drugstoneType' in detail and detail['drugstoneType'] == 'protein': - detail['symbol'] = list(set(detail['symbol'])) - detail['entrez'] = list(set(detail['entrez'])) - detail['uniprot_ac'] = list(set(detail['uniprot_ac'])) - if 'ensg' in detail: - detail['ensg'] = list(set(detail['ensg'])) + detail['symbol'] = list(set(detail['symbol'])) if 'symbol' in detail else [] + detail['entrez'] = list(set(detail['entrez'])) if 'entrez' in detail else [] + detail['uniprot'] = list(set(detail['uniprot'])) if 'uniprot' in detail else [] + detail['ensg'] = list(set(detail['ensg'])) if 'ensg' in detail else [] edges = parameters['input_network']['edges'] @@ -420,7 +431,7 @@ def result_view(request) -> Response: for i in proteins: new_i = { 'id': i['id'], - 'uniprot_ac': i['uniprot_ac'] if 'uniprot_ac' in i else [], + 'uniprot': i['uniprot'] if 'uniprot' in i else [], 'gene': i['symbol'] if 'symbol' in i else [], 'name': i['protein_name'] if 'protein_name' in i else [], 'ensembl': i['ensg'] if 'ensg' in i else [], @@ -463,9 +474,11 @@ def graph_export(request) -> Response: Recieve whole graph data and write it to graphml file. Return the file ready to download. """ - remove_node_properties = ['color', 'shape', 'border_width', 'group_name', 'border_width_selected', 'shadow', - 'group_id', 'drugstone_type', 'font', 'x', 'y'] - remove_edge_properties = ['group_name', 'color', 'dashes', 'shadow', 'id'] + remove_node_properties = ['color', 'shape', 'border_width', 'group', 'border_width_selected', 'shadow', + 'group_id', 'drugstone_type', 'font', 'x', 'y', '_group'] + rename_node_properties = {'group_name': 'group'} + remove_edge_properties = ['group', 'color', 'dashes', 'shadow', 'id'] + rename_edge_properties = {'group_name': 'group'} nodes = request.data.get('nodes', []) edges = request.data.get('edges', []) fmt = request.data.get('fmt', 'graphml') @@ -476,6 +489,10 @@ def graph_export(request) -> Response: for prop in remove_node_properties: if prop in node: del node[prop] + for k, v in rename_node_properties.items(): + if k in node: + node[v] = node[k] + del node[k] for key in list(node.keys()): if isinstance(node[key], list) or isinstance(node[key], dict): node[key] = json.dumps(node[key]) @@ -497,6 +514,10 @@ def graph_export(request) -> Response: for prop in remove_edge_properties: if prop in e: del e[prop] + for k, v in rename_edge_properties.items(): + if k in e: + e[v] = e[k] + del e[k] for key in e: if isinstance(e[key], list) or isinstance(e[key], dict): e[key] = json.dumps(e[key]) diff --git a/requirements.txt b/requirements.txt index e2ca8b1400cb459de5d3df84f22a841954ed7b9c..499918e26a46bf66835ed4667503036bfe5c2a35 100755 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,9 @@ celery==5.2.7 certifi==2022.12.7 chardet==3.0.4 click==8.1.3 -cryptography==38.0.3 +cryptography==39.0.1 decorator==4.4.2 -Django==3.2.16 +Django==3.2.17 django-cors-headers==3.4.0 django-redis==4.11.0 django-rq-dashboard==0.3.3 diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh index 13501e0d43ebdc4537a2d7ccddfc9dc857a2ab8e..079cbada214c70e54dc849f59a97778350a4839e 100755 --- a/scripts/docker-entrypoint.sh +++ b/scripts/docker-entrypoint.sh @@ -11,4 +11,5 @@ else python3 manage.py populate_db --update -a python3 manage.py make_graphs fi + /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" diff --git a/tasks/quick_task.py b/tasks/quick_task.py index 899674d8e327360e85b9844439d63300edaf2a0c..a49536e512c7479990cee36221f04dac77fda790 100755 --- a/tasks/quick_task.py +++ b/tasks/quick_task.py @@ -1,7 +1,8 @@ from tasks.task_hook import TaskHook + def quick_task(task_hook: TaskHook): - def run_closeness(parameters, network): + def run_closeness(parameters, network, original_seeds=None): from .closeness_centrality import closeness_centrality def closeness_progress(progress, status): @@ -9,6 +10,8 @@ def quick_task(task_hook: TaskHook): def closeness_set_result(result): result["network"]["edges"].extend(network["edges"]) + if original_seeds is not None: + result['node_attributes']['is_seed'] = original_seeds task_hook.set_results(result) # Prepare intermediate hook @@ -20,7 +23,6 @@ def quick_task(task_hook: TaskHook): # Run closeness centrality closeness_centrality(closeness_task_hook) - def run_multi_steiner(parameters): from .multi_steiner import multi_steiner @@ -35,7 +37,7 @@ def quick_task(task_hook: TaskHook): if len(seeds) == 0: task_hook.set_results({"network": {"nodes": [], "edges": []}}) return - + og_seeds = parameters.get('seeds') parameters.update({ "seeds": seeds, "result_size": 10, @@ -43,7 +45,11 @@ def quick_task(task_hook: TaskHook): "target": "drug", "include_non_approved_drugs": True }) - run_closeness(parameters, result["network"]) + is_seed = result.get('node_attributes') + run_closeness(parameters, result["network"], result['node_attributes']['is_seed']) + # parameters.update({ + # "seeds": og_seeds + # }) parameters["num_trees"] = 1 parameters["hub_penalty"] = 1 diff --git a/tasks/util/scores_to_results.py b/tasks/util/scores_to_results.py index b690db510699139e7c8a368c80103a892018440b..cb474961e14aecd4d4b6fc96f15c633bfac72ee5 100755 --- a/tasks/util/scores_to_results.py +++ b/tasks/util/scores_to_results.py @@ -12,14 +12,14 @@ def scores_to_results( pdi_dataset, filterPaths ): - r"""Transforms the scores to the required result format.""" node_name_attribute = "internal_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute if target == "drug": candidates = [(node, scores[node]) for node in drug_ids if scores[node] > 0] else: - candidates = [(node, scores[node]) for node in range(g.num_vertices()) if scores[node] > 0 and node not in set(seed_ids)] + candidates = [(node, scores[node]) for node in range(g.num_vertices()) if + scores[node] > 0 and node not in set(seed_ids)] best_candidates = [item[0] for item in sorted(candidates, key=lambda item: item[1], reverse=True)[:result_size]] # Concatenate best result candidates with seeds and compute induced subgraph. # since the result size filters out nodes, the result network is not complete anymore. @@ -29,7 +29,7 @@ def scores_to_results( intermediate_nodes = set() returned_edges = set() - returned_nodes = set(seed_ids) # return seed_ids in any case + returned_nodes = set(seed_ids) # return seed_ids in any case # return only the path to a drug with the shortest distance accepted_candidates = set() @@ -44,11 +44,15 @@ def scores_to_results( vertices, edges = gtt.shortest_path(g, candidate, seed_id) drug_in_path = False + seed_in_path = False for vertex in vertices: if g.vertex_properties["type"][int(vertex)] == "drug" and vertex != candidate: drug_in_path = True break - if drug_in_path: + if int(vertex) in seed_ids and int(vertex) != seed_id: + seed_in_path = True + break + if drug_in_path or seed_in_path: continue accepted_candidates.add(g.vertex_properties[node_name_attribute][int(candidate)]) for vertex in vertices: @@ -58,7 +62,8 @@ def scores_to_results( intermediate_nodes.add(g.vertex_properties[node_name_attribute][int(vertex)]) returned_nodes.add(int(vertex)) for edge in edges: - if ((edge.source(), edge.target()) not in returned_edges) or ((edge.target(), edge.source()) not in returned_edges): + if (((edge.source(), edge.target()) not in returned_edges) or ( + (edge.target(), edge.source()) not in returned_edges)) and int(edge.target()) in returned_nodes and int(edge.source()) in returned_nodes: returned_edges.add((edge.source(), edge.target())) else: for candidate in best_candidates: @@ -66,11 +71,15 @@ def scores_to_results( vertices, edges = gtt.shortest_path(g, candidate, seed_id) drug_in_path = False + seed_in_path = False for vertex in vertices: if g.vertex_properties["type"][int(vertex)] == "drug" and vertex != candidate: drug_in_path = True break - if drug_in_path: + if int(vertex) in seed_ids and int(vertex) != seed_id: + seed_in_path = True + break + if drug_in_path or seed_in_path: continue accepted_candidates.add(g.vertex_properties[node_name_attribute][int(candidate)]) for vertex in vertices: @@ -80,18 +89,22 @@ def scores_to_results( intermediate_nodes.add(g.vertex_properties[node_name_attribute][int(vertex)]) returned_nodes.add(int(vertex)) for edge in edges: - if ((edge.source(), edge.target()) not in returned_edges) or ((edge.target(), edge.source()) not in returned_edges): + if (((edge.source(), edge.target()) not in returned_edges) or ( + (edge.target(), edge.source()) not in returned_edges)) and int( + edge.target()) in returned_nodes and int(edge.source()) in returned_nodes: returned_edges.add((edge.source(), edge.target())) for node in accepted_candidates: if node in intermediate_nodes: intermediate_nodes.remove(node) subgraph = { - "nodes":[g.vertex_properties[node_name_attribute][node] for node in returned_nodes], - "edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for source, target in returned_edges], - } + "nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes], + "edges": [{"from": g.vertex_properties[node_name_attribute][source], + "to": g.vertex_properties[node_name_attribute][target]} for source, target in returned_edges], + } # Compute node attributes. - node_types = {g.vertex_properties[node_name_attribute][node]: g.vertex_properties["type"][node] for node in returned_nodes} + node_types = {g.vertex_properties[node_name_attribute][node]: g.vertex_properties["type"][node] for node in + returned_nodes} is_seed = {g.vertex_properties[node_name_attribute][node]: node in set(seed_ids) for node in returned_nodes} returned_scores = {g.vertex_properties[node_name_attribute][node]: scores[node] for node in returned_nodes}