From 11961adb4fd456fb07c7cd29e2e0f88a5e9b54e1 Mon Sep 17 00:00:00 2001
From: AndiMajore <andi.majore@googlemail.com>
Date: Thu, 7 Jul 2022 21:30:26 +0200
Subject: [PATCH] tried to simplify read_graph_tool_graph

Former-commit-id: 4b09a9730d74bd339303f6ae23907c779c254472
---
 tasks/util/read_graph_tool_graph.py | 45 +++++++++++++++++------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/tasks/util/read_graph_tool_graph.py b/tasks/util/read_graph_tool_graph.py
index d86de27..94db075 100755
--- a/tasks/util/read_graph_tool_graph.py
+++ b/tasks/util/read_graph_tool_graph.py
@@ -4,35 +4,35 @@ import graph_tool.topology as gtt
 # def read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits=False, include_indirect_drugs=False, include_non_approved_drugs=False):
 def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=False, include_non_approved_drugs=False, target='drug'):
     r"""Reads a graph-tool graph from file.
-    
-    Reads a graph-tool graph from graphml or gt file and returns is along 
+
+    Reads a graph-tool graph from graphml or gt file and returns is along
     with the internal IDs of the seed and viral seeds and the drugs.
-    
+
     Parameters
     ----------
     file_path : str
       A string specifying the path to a graphml or gt file.
-      
+
     seeds : list of str
       A list of drugstone IDs identifying the seed seeds.
-      
+
     include_indirect_drugs : bool
       If True, edges from non-seed host proteins to drugs are ignored when ranking drugs.
-      
+
     include_non_approved_drugs : bool
       If True, also non-approved drugs are included in the analysis
 
     target : str
       A string specifying the target of the search, either "drug" or "drug-target"
-      
+
     Returns
     -------
     g : graph_tool.Graph
       The constructed graph.
-      
+
     seed_ids : list of int
       The graph indices for all seed nodes
-      
+
     drug_ids : list of int
       The graph indices for all drug nodes
     """
@@ -40,7 +40,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
 
     g = gt.load_graph(file_path)
     # g = gtt.extract_largest_component(gg, directed=False, prune=True)   # this line is added since we need to work with the LCC of the graphs for all algorithms
-  
+
     # drug_protein = "DrugHasTarget"
     d_type = "drug"
     node_name_attribute = "drugstone_id"  # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
@@ -54,7 +54,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
         elif target != 'drug' and g.vertex_properties["type"][node] == d_type:
             deleted_nodes.append(node)
     g.remove_vertex(deleted_nodes, fast=True)
-        
+
     # Retrieve internal IDs of seed_ids and viral_protein_ids.
     seeds = set(seeds)
     seed_ids = []
@@ -72,12 +72,12 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
                 drug_groups = g.vertex_properties["status"][node].split(', ')
                 if "approved" in drug_groups:
                     drug_ids.append(node)
-            
+
     # Check that all seed seeds have been matched and throw error, otherwise.
     for protein, found in is_matched.items():
         if not found:
             raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
-    
+
     # Delete edges that should be ignored or are not contained in the selected dataset.
     deleted_edges = []
     if (drug_ids and not include_indirect_drugs):  # If only_direct_drugs should be included, remove any drug-protein edges that the drug is not a direct neighbor of any seeds
@@ -91,13 +91,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
             print(int(drug))
         for edge in g.edges():
             if g.edge_properties["type"][edge] == 'drug-protein':
-                if g.vertex_properties["type"][edge.target()] == d_type and edge.target() not in direct_drugs:
-                    deleted_edges.append(edge)
-                    if int(edge.target()) in drug_ids:
+                if g.vertex_properties["type"][edge.target()] == d_type:
+                    indir_drug = edge.target() not in direct_drugs
+                    not_seed = edge.source() not in seed_ids
+                    if indir_drug or not_seed:
+                        deleted_edges.append(edge)
+                    if indir_drug and int(edge.target()) in drug_ids:
                         drug_ids.remove(int(edge.target()))
-                elif g.vertex_properties["type"][edge.source()] == d_type and edge.source() not in direct_drugs:
-                    deleted_edges.append(edge)
-                    if int(edge.source()) in drug_ids:
+
+                elif g.vertex_properties["type"][edge.source()] == d_type and edge.source() not in direct_drugs or edge.target() not in seed_ids:
+                    indir_drug = edge.source() not in direct_drugs
+                    not_seed = edge.target() not in seed_ids
+                    if indir_drug or not_seed:
+                        deleted_edges.append(edge)
+                    if indir_drug and int(edge.source()) in drug_ids:
                         drug_ids.remove(int(edge.source()))
 
     g.set_fast_edge_removal(fast=True)
-- 
GitLab