Skip to content
Snippets Groups Projects
Commit 4b09a973 authored by AndiMajore's avatar AndiMajore
Browse files

tried to simplify read_graph_tool_graph

parent f162be13
No related branches found
No related tags found
No related merge requests found
......@@ -4,35 +4,35 @@ import graph_tool.topology as gtt
# def read_graph_tool_graph(file_path, seeds, datasets, ignored_edge_types, max_deg, ignore_non_seed_baits=False, include_indirect_drugs=False, include_non_approved_drugs=False):
def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=False, include_non_approved_drugs=False, target='drug'):
r"""Reads a graph-tool graph from file.
Reads a graph-tool graph from graphml or gt file and returns is along
Reads a graph-tool graph from graphml or gt file and returns is along
with the internal IDs of the seed and viral seeds and the drugs.
Parameters
----------
file_path : str
A string specifying the path to a graphml or gt file.
seeds : list of str
A list of drugstone IDs identifying the seed seeds.
include_indirect_drugs : bool
If True, edges from non-seed host proteins to drugs are ignored when ranking drugs.
include_non_approved_drugs : bool
If True, also non-approved drugs are included in the analysis
target : str
A string specifying the target of the search, either "drug" or "drug-target"
Returns
-------
g : graph_tool.Graph
The constructed graph.
seed_ids : list of int
The graph indices for all seed nodes
drug_ids : list of int
The graph indices for all drug nodes
"""
......@@ -40,7 +40,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
g = gt.load_graph(file_path)
# g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms
# drug_protein = "DrugHasTarget"
d_type = "drug"
node_name_attribute = "drugstone_id" # nodes in the input network which is created from RepoTrialDB have primaryDomainId as name attribute
......@@ -54,7 +54,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
elif target != 'drug' and g.vertex_properties["type"][node] == d_type:
deleted_nodes.append(node)
g.remove_vertex(deleted_nodes, fast=True)
# Retrieve internal IDs of seed_ids and viral_protein_ids.
seeds = set(seeds)
seed_ids = []
......@@ -72,12 +72,12 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
drug_groups = g.vertex_properties["status"][node].split(', ')
if "approved" in drug_groups:
drug_ids.append(node)
# Check that all seed seeds have been matched and throw error, otherwise.
for protein, found in is_matched.items():
if not found:
raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
# Delete edges that should be ignored or are not contained in the selected dataset.
deleted_edges = []
if (drug_ids and not include_indirect_drugs): # If only_direct_drugs should be included, remove any drug-protein edges that the drug is not a direct neighbor of any seeds
......@@ -91,13 +91,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
print(int(drug))
for edge in g.edges():
if g.edge_properties["type"][edge] == 'drug-protein':
if g.vertex_properties["type"][edge.target()] == d_type and edge.target() not in direct_drugs:
deleted_edges.append(edge)
if int(edge.target()) in drug_ids:
if g.vertex_properties["type"][edge.target()] == d_type:
indir_drug = edge.target() not in direct_drugs
not_seed = edge.source() not in seed_ids
if indir_drug or not_seed:
deleted_edges.append(edge)
if indir_drug and int(edge.target()) in drug_ids:
drug_ids.remove(int(edge.target()))
elif g.vertex_properties["type"][edge.source()] == d_type and edge.source() not in direct_drugs:
deleted_edges.append(edge)
if int(edge.source()) in drug_ids:
elif g.vertex_properties["type"][edge.source()] == d_type and edge.source() not in direct_drugs or edge.target() not in seed_ids:
indir_drug = edge.source() not in direct_drugs
not_seed = edge.target() not in seed_ids
if indir_drug or not_seed:
deleted_edges.append(edge)
if indir_drug and int(edge.source()) in drug_ids:
drug_ids.remove(int(edge.source()))
g.set_fast_edge_removal(fast=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment