Skip to content
Snippets Groups Projects
Commit 62f066c9 authored by AndiMajore's avatar AndiMajore
Browse files

tried to simplify read_graph_tool_graph

Former-commit-id: b30ddcce5a74933259b2d844f3576058274aa881 [formerly 3ffcd24d7bd7ab56f2a38772eb665da6ff7af3c6]
Former-commit-id: 5a3c12455d37f3ad1b696e60d178c5f2a4e66e3e
parent 22713704
No related branches found
No related tags found
No related merge requests found
...@@ -4,35 +4,35 @@ import graph_tool.topology as gtt ...@@ -4,35 +4,35 @@ import graph_tool.topology as gtt
# def read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits=False, include_indirect_drugs=False, include_non_approved_drugs=False): # def read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits=False, include_indirect_drugs=False, include_non_approved_drugs=False):
def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=False, include_non_approved_drugs=False, target='drug'): def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=False, include_non_approved_drugs=False, target='drug'):
r"""Reads a graph-tool graph from file. r"""Reads a graph-tool graph from file.
Reads a graph-tool graph from graphml or gt file and returns is along Reads a graph-tool graph from graphml or gt file and returns is along
with the internal IDs of the seed and viral seeds and the drugs. with the internal IDs of the seed and viral seeds and the drugs.
Parameters Parameters
---------- ----------
file_path : str file_path : str
A string specifying the path to a graphml or gt file. A string specifying the path to a graphml or gt file.
seeds : list of str seeds : list of str
A list of drugstone IDs identifying the seed seeds. A list of drugstone IDs identifying the seed seeds.
include_indirect_drugs : bool include_indirect_drugs : bool
If True, edges from non-seed host proteins to drugs are ignored when ranking drugs. If True, edges from non-seed host proteins to drugs are ignored when ranking drugs.
include_non_approved_drugs : bool include_non_approved_drugs : bool
If True, also non-approved drugs are included in the analysis If True, also non-approved drugs are included in the analysis
target : str target : str
A string specifying the target of the search, either "drug" or "drug-target" A string specifying the target of the search, either "drug" or "drug-target"
Returns Returns
------- -------
g : graph_tool.Graph g : graph_tool.Graph
The constructed graph. The constructed graph.
seed_ids : list of int seed_ids : list of int
The graph indices for all seed nodes The graph indices for all seed nodes
drug_ids : list of int drug_ids : list of int
The graph indices for all drug nodes The graph indices for all drug nodes
""" """
...@@ -40,7 +40,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -40,7 +40,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
g = gt.load_graph(file_path) g = gt.load_graph(file_path)
# g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms # g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms
# drug_protein = "DrugHasTarget" # drug_protein = "DrugHasTarget"
d_type = "drug" d_type = "drug"
node_name_attribute = "drugstone_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute node_name_attribute = "drugstone_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
...@@ -54,7 +54,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -54,7 +54,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
elif target != 'drug' and g.vertex_properties["type"][node] == d_type: elif target != 'drug' and g.vertex_properties["type"][node] == d_type:
deleted_nodes.append(node) deleted_nodes.append(node)
g.remove_vertex(deleted_nodes, fast=True) g.remove_vertex(deleted_nodes, fast=True)
# Retrieve internal IDs of seed_ids and viral_protein_ids. # Retrieve internal IDs of seed_ids and viral_protein_ids.
seeds = set(seeds) seeds = set(seeds)
seed_ids = [] seed_ids = []
...@@ -72,12 +72,12 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -72,12 +72,12 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
drug_groups = g.vertex_properties["status"][node].split(', ') drug_groups = g.vertex_properties["status"][node].split(', ')
if "approved" in drug_groups: if "approved" in drug_groups:
drug_ids.append(node) drug_ids.append(node)
# Check that all seed seeds have been matched and throw error, otherwise. # Check that all seed seeds have been matched and throw error, otherwise.
for protein, found in is_matched.items(): for protein, found in is_matched.items():
if not found: if not found:
raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path)) raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
# Delete edges that should be ignored or are not contained in the selected dataset. # Delete edges that should be ignored or are not contained in the selected dataset.
deleted_edges = [] deleted_edges = []
if (drug_ids and not include_indirect_drugs): # If only_direct_drugs should be included, remove any drug-protein edges that the drug is not a direct neighbor of any seeds if (drug_ids and not include_indirect_drugs): # If only_direct_drugs should be included, remove any drug-protein edges that the drug is not a direct neighbor of any seeds
...@@ -91,13 +91,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -91,13 +91,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
print(int(drug)) print(int(drug))
for edge in g.edges(): for edge in g.edges():
if g.edge_properties["type"][edge] == 'drug-protein': if g.edge_properties["type"][edge] == 'drug-protein':
if g.vertex_properties["type"][edge.target()] == d_type and edge.target() not in direct_drugs: if g.vertex_properties["type"][edge.target()] == d_type:
deleted_edges.append(edge) indir_drug = edge.target() not in direct_drugs
if int(edge.target()) in drug_ids: not_seed = edge.source() not in seed_ids
if indir_drug or not_seed:
deleted_edges.append(edge)
if indir_drug and int(edge.target()) in drug_ids:
drug_ids.remove(int(edge.target())) drug_ids.remove(int(edge.target()))
elif g.vertex_properties["type"][edge.source()] == d_type and edge.source() not in direct_drugs:
deleted_edges.append(edge) elif g.vertex_properties["type"][edge.source()] == d_type and edge.source() not in direct_drugs or edge.target() not in seed_ids:
if int(edge.source()) in drug_ids: indir_drug = edge.source() not in direct_drugs
not_seed = edge.target() not in seed_ids
if indir_drug or not_seed:
deleted_edges.append(edge)
if indir_drug and int(edge.source()) in drug_ids:
drug_ids.remove(int(edge.source())) drug_ids.remove(int(edge.source()))
g.set_fast_edge_removal(fast=True) g.set_fast_edge_removal(fast=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment