Skip to content
Snippets Groups Projects
Commit 52f31b01 authored by AndiMajore's avatar AndiMajore
Browse files

small update but not all fixed

parent 8f6295d4
No related branches found
No related tags found
No related merge requests found
......@@ -27,7 +27,6 @@ COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/
RUN pip install /usr/src/drugstone/python_nedrex/
RUN mkdir store
COPY . /usr/src/drugstone/
......
......@@ -14,7 +14,6 @@ services:
volumes:
- drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations
- drugstone_data_volume:/usr/src/drugstone/data
- drugstone_store_volume:/usr/src/drugstone/store
ports:
- 8001:8000
networks:
......@@ -102,5 +101,4 @@ volumes:
drugstone_db_schema_volume:
# external: true
drugstone_data_volume:
# external: true
drugstone_store_volume:
\ No newline at end of file
# external: true
\ No newline at end of file
......@@ -198,7 +198,7 @@ class NedrexImporter:
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['databases']:
for source in edge['dataSources']:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein))
except KeyError:
pass
......@@ -237,7 +237,7 @@ class NedrexImporter:
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2)
if not update or e.__hash__() not in existing:
bulk.append(e)
for source in edge['assertedBy']:
for source in edge['dataSources']:
bulk.append(
models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1,
to_protein=protein2))
......@@ -268,7 +268,7 @@ class NedrexImporter:
score=edge['score'])
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['assertedBy']:
for source in edge['dataSources']:
bulk.add(
models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder,
score=edge['score']))
......@@ -298,7 +298,7 @@ class NedrexImporter:
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['assertedBy']:
for source in edge['dataSources']:
bulk.add(
models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder))
except KeyError:
......
from requests.exceptions import RetryError
from drugstone import models
from python_nedrex.static import get_metadata
......@@ -23,28 +25,46 @@ def get_ppi_apid():
def get_ppi_nedrex_biogrid(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['biogrid']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID',
link=url,
version=get_metadata()['source_databases']['biogrid']['date']
version=version
)
return dataset
def get_ppi_nedrex_iid(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['iid']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IID',
link=url,
version=get_metadata()['source_databases']['iid']['date']
version=version
)
return dataset
def get_ppi_nedrex_intact(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['intact']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IntAct',
link=url,
version=get_metadata()['source_databases']['intact']['date']
version=version
)
return dataset
......@@ -59,37 +79,61 @@ def get_ppi_biogrid():
def get_drug_target_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
def get_ppi_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
def get_protein_disorder_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
def get_drug_disorder_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
......@@ -138,62 +182,103 @@ def get_drug_disorder_drugbank():
)
return dataset
def get_today_version():
import datetime
now = datetime.date.today()
version = f'{now.year}-{now.month}-{now.day}_temp'
return version
def get_dis_prot_nedrex_disgenet(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['disgenet']['date']
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET',
link=url,
version=get_metadata()['source_databases']['disgenet']['date']
version=version
)
return dataset
def get_dis_prot_nedrex_omim(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['omim']['date']
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create(
name='OMIM',
link=url,
version=get_metadata()['source_databases']['omim']['date']
version=version
)
return dataset
def get_drdis_nedrex_drugcentral(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drug_central']['date']
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
version=version
)
return dataset
def get_drdis_nedrex_ctd(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['ctd']['date']
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='CTD',
link=url,
version=get_metadata()['source_databases']['ctd']['date']
version=version
)
return dataset
def get_pdr_nedrex_drugcentral(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drug_central']['date']
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
version=version
)
return dataset
def get_pdr_nedrex_drugbank(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drugbank']['date']
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank',
link=url,
version=get_metadata()['source_databases']['drugbank']['date']
version=version
)
return dataset
def get_pdr_nedrex_datasets(url):
return {'DrugBank': get_pdr_nedrex_drugbank(url), 'DrugCentral': get_pdr_nedrex_drugcentral(url)}
return {'drugbank': get_pdr_nedrex_drugbank(url), 'drugcentral': get_pdr_nedrex_drugcentral(url)}
def get_drdis_nedrex_datasets(url):
return {'ctd':get_drdis_nedrex_ctd(url), 'Drug Central':get_drdis_nedrex_drugcentral(url)}
return {'ctd':get_drdis_nedrex_ctd(url), 'drugcentral':get_drdis_nedrex_drugcentral(url)}
def get_ppi_nedrex_datasets(url):
return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)}
......
......@@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone
python3 manage.py migrate
python3 manage.py createfixtures
python3 manage.py cleanuptasks
python3 manage.py populate_db --update -a
#python3 manage.py populate_db --update -a
#python3 manage.py make_graphs
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
......@@ -45,6 +45,7 @@ def quick_task(task_hook: TaskHook):
"result_size": 20,
"include_non_approved_drugs": True,
"include_indirect_drugs": False,
"target":"drug"
})
tr_task_hook = TaskHook(parameters, task_hook.data_directory, progress, set_result)
......
......@@ -200,8 +200,6 @@ def trust_rank(task_hook: TaskHook):
task_hook.set_progress(0 / 4.0, "Parsing input.")
file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt")
g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target)
print(seed_ids)
print(drug_ids)
task_hook.set_progress(1 / 4.0, "Computing edge weights.")
weights = edge_weights(g, hub_penalty, inverse=True)
......
......@@ -37,7 +37,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
The graph indices for all drug nodes
"""
# Read the graph.
print(f"loading {file_path} for {target}")
g = gt.load_graph(file_path)
# g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms
......@@ -47,16 +47,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
# Delete all nodes that are not contained in the selected datasets and have degrees higher than max_deg
deleted_nodes = []
for node in range(g.num_vertices()):
#Remove all unconnected nodes TODO probably already skip when creating .gt files
if g.vertex(node).out_degree() == 0 and target == 'drug':
deleted_nodes.append(node)
# if not g.vertex_properties["name"][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
if not g.vertex_properties[node_name_attribute][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
elif not g.vertex_properties[node_name_attribute][node] in set(seeds) and (g.vertex(node).out_degree() > max_deg):
deleted_nodes.append(node)
# remove all drugs from graph if we are not looking for drugs
elif target != 'drug' and g.vertex_properties["type"][node] == d_type:
deleted_nodes.append(node)
g.remove_vertex(deleted_nodes, fast=True)
# Retrieve internal IDs of seed_ids and viral_protein_ids.
# Retrieve internal IDs of seed_ids
seeds = set(seeds)
print(seeds)
seed_ids = []
drug_ids = []
is_matched = {protein: False for protein in seeds}
......@@ -64,7 +68,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
node_type = g.vertex_properties["type"][node]
if g.vertex_properties[node_name_attribute][node] in seeds:
seed_ids.append(node)
is_matched[g.vertex_properties[node_name_attribute][node]] = True
is_matched[g.vertex_properties[node_name_attribute][node]] = node
if node_type == d_type:
if include_non_approved_drugs:
drug_ids.append(node)
......@@ -74,9 +78,11 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
drug_ids.append(node)
# Check that all seed seeds have been matched and throw error, otherwise.
# print(deleted_nodes)
print(seed_ids)
for protein, found in is_matched.items():
if not found:
raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
# Delete edges that should be ignored or are not contained in the selected dataset.
deleted_edges = []
......@@ -87,8 +93,6 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
direct_drugs.add(edge.target())
elif g.vertex_properties["type"][edge.source()] == d_type and edge.target() in seed_ids:
direct_drugs.add(edge.source())
for drug in direct_drugs:
print(int(drug))
for edge in g.edges():
if g.edge_properties["type"][edge] == 'drug-protein':
if g.vertex_properties["type"][edge.target()] == d_type:
......@@ -113,6 +117,17 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
for edge in deleted_edges:
g.remove_edge(edge)
g.set_fast_edge_removal(fast=False)
print("Drugs")
print(drug_ids)
print("Vertices")
vertices = 0
for _ in g.vertices():
vertices += 1
print(f'\t{vertices}')
print("Edges")
edges = 0
for _ in g.edges():
edges+=1
print(f'\t{edges}')
# Return the graph and the indices of the seed_ids and the seeds.
return g, seed_ids, drug_ids
......@@ -23,6 +23,7 @@ def scores_to_results(
else:
candidates = [(node, scores[node]) for node in range(g.num_vertices()) if scores[node] > 0 and node not in set(seed_ids)]
best_candidates = [item[0] for item in sorted(candidates, key=lambda item: item[1], reverse=True)[:result_size]]
print(f'Candidate list length: {len(best_candidates)}')
# Concatenate best result candidates with seeds and compute induced subgraph.
# since the result size filters out nodes, the result network is not complete anymore.
......@@ -82,6 +83,7 @@ def scores_to_results(
for edge in edges:
if ((edge.source(), edge.target()) not in returned_edges) or ((edge.target(), edge.source()) not in returned_edges):
returned_edges.add((edge.source(), edge.target()))
print(f'Returned nodes number: {len(returned_nodes)}')
subgraph = {
"nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes],
"edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for source, target in returned_edges],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment