From 7bddf68ef02edb854f5eae02ee48a50ffd257674 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Tue, 12 Jul 2022 13:07:15 +0200
Subject: [PATCH] remove unconnected nodes from gt files

Former-commit-id: 4ae5d2b0bdf4ef0968837b6ec3f99c7d27802ad8
---
 drugstone/management/commands/make_graphs.py | 15 +++++++++++++--
 scripts/docker-entrypoint.sh                 |  5 ++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drugstone/management/commands/make_graphs.py b/drugstone/management/commands/make_graphs.py
index a6c3d81..7759ad3 100755
--- a/drugstone/management/commands/make_graphs.py
+++ b/drugstone/management/commands/make_graphs.py
@@ -13,6 +13,7 @@ django.setup()
 
 KERNEL = 6
 
+
 def _internal_expression_scores(drugstone_id: str) -> dict:
     """ Looks up the tissue specific expression scores for a given protein.
     The scores are loaded from the django database.
@@ -38,6 +39,7 @@ def _internal_expression_scores(drugstone_id: str) -> dict:
 
     return tissue_scores
 
+
 def _internal_pdis(dataset_name: str) -> List[dict]:
     """ Fetches all internal protein-drug interactions for a given dataset.
     Interactions are taken from the django database.
@@ -57,6 +59,7 @@ def _internal_pdis(dataset_name: str) -> List[dict]:
 
     return node_node_interactions
 
+
 def _internal_ppis(dataset_name: str) -> List[dict]:
     """ Fetches all internal protein-protein interactions for a given dataset.
     Interactions are taken from the django database.
@@ -92,7 +95,7 @@ def create_gt(params: Tuple[str, str]) -> None:
     print(f'loading nodes')
     data['nodes'] = serializers.ProteinSerializer(many=True).to_representation(
         models.Protein.objects.all()
-    ) 
+    )
 
     print(f'loading edges/{ppi_dataset}')
     data['edges'] = _internal_ppis(ppi_dataset)
@@ -100,7 +103,7 @@ def create_gt(params: Tuple[str, str]) -> None:
     print(f'loading drugs')
     data['drugs'] = serializers.DrugSerializer(many=True).to_representation(
         models.Drug.objects.all()
-    ) 
+    )
     print(f'loading drug_edges/{pdi_dataset}')
     data['drug_edges'] = _internal_pdis(pdi_dataset)
 
@@ -175,6 +178,14 @@ def create_gt(params: Tuple[str, str]) -> None:
         e_type[e] = 'drug-protein'
     print("done with drug edges")
 
+    # remove unconnected nodes
+    delete_vertices = set()
+    for vertex in g.iter_vertices():
+        if vertex.out_degree() == 0:
+            delete_vertices.add(vertex)
+
+    g.remove_vertex(reversed(sorted(delete_vertices)), fast=True)
+
     # save graph
     filename = f"./data/Networks/internal_{ppi_dataset}_{pdi_dataset}.gt"
     g.save(filename)
diff --git a/scripts/docker-entrypoint.sh b/scripts/docker-entrypoint.sh
index efb99a9..43bf32d 100755
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -4,8 +4,7 @@ python3 manage.py makemigrations drugstone
 python3 manage.py migrate
 python3 manage.py createfixtures
 python3 manage.py cleanuptasks
-python3 manage.py populate_db --update -p
-#python3 manage.py populate_db --update -a
-#python3 manage.py make_graphs
+python3 manage.py populate_db --update -a
+python3 manage.py make_graphs
 
 /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
-- 
GitLab