Skip to content
Snippets Groups Projects
Commit 668a9136 authored by AndiMajore's avatar AndiMajore
Browse files

small update but not all fixed

Former-commit-id: 52f31b01
parent befd09e3
No related branches found
No related tags found
No related merge requests found
......@@ -27,7 +27,6 @@ COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/
RUN pip install /usr/src/drugstone/python_nedrex/
RUN mkdir store
COPY . /usr/src/drugstone/
......
......@@ -14,7 +14,6 @@ services:
volumes:
- drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations
- drugstone_data_volume:/usr/src/drugstone/data
- drugstone_store_volume:/usr/src/drugstone/store
ports:
- 8001:8000
networks:
......@@ -102,5 +101,4 @@ volumes:
drugstone_db_schema_volume:
# external: true
drugstone_data_volume:
# external: true
drugstone_store_volume:
\ No newline at end of file
# external: true
\ No newline at end of file
......@@ -198,7 +198,7 @@ class NedrexImporter:
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['databases']:
for source in edge['dataSources']:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein))
except KeyError:
pass
......@@ -237,7 +237,7 @@ class NedrexImporter:
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2)
if not update or e.__hash__() not in existing:
bulk.append(e)
for source in edge['assertedBy']:
for source in edge['dataSources']:
bulk.append(
models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1,
to_protein=protein2))
......@@ -268,7 +268,7 @@ class NedrexImporter:
score=edge['score'])
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['assertedBy']:
for source in edge['dataSources']:
bulk.add(
models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder,
score=edge['score']))
......@@ -298,7 +298,7 @@ class NedrexImporter:
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing:
bulk.add(e)
for source in edge['assertedBy']:
for source in edge['dataSources']:
bulk.add(
models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder))
except KeyError:
......
from requests.exceptions import RetryError
from drugstone import models
from python_nedrex.static import get_metadata
......@@ -23,28 +25,46 @@ def get_ppi_apid():
def get_ppi_nedrex_biogrid(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['biogrid']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID',
link=url,
version=get_metadata()['source_databases']['biogrid']['date']
version=version
)
return dataset
def get_ppi_nedrex_iid(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['iid']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IID',
link=url,
version=get_metadata()['source_databases']['iid']['date']
version=version
)
return dataset
def get_ppi_nedrex_intact(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['intact']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='IntAct',
link=url,
version=get_metadata()['source_databases']['intact']['date']
version=version
)
return dataset
......@@ -59,37 +79,61 @@ def get_ppi_biogrid():
def get_drug_target_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
def get_ppi_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
def get_protein_disorder_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
def get_drug_disorder_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='NeDRex',
link=url,
version=get_metadata()['version'],
version=version
)
return dataset
......@@ -138,62 +182,103 @@ def get_drug_disorder_drugbank():
)
return dataset
def get_today_version():
import datetime
now = datetime.date.today()
version = f'{now.year}-{now.month}-{now.day}_temp'
return version
def get_dis_prot_nedrex_disgenet(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['disgenet']['date']
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET',
link=url,
version=get_metadata()['source_databases']['disgenet']['date']
version=version
)
return dataset
def get_dis_prot_nedrex_omim(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['omim']['date']
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create(
name='OMIM',
link=url,
version=get_metadata()['source_databases']['omim']['date']
version=version
)
return dataset
def get_drdis_nedrex_drugcentral(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drug_central']['date']
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
version=version
)
return dataset
def get_drdis_nedrex_ctd(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['ctd']['date']
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create(
name='CTD',
link=url,
version=get_metadata()['source_databases']['ctd']['date']
version=version
)
return dataset
def get_pdr_nedrex_drugcentral(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drug_central']['date']
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create(
name='Drug Central',
link=url,
version=get_metadata()['source_databases']['drug_central']['date']
version=version
)
return dataset
def get_pdr_nedrex_drugbank(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drugbank']['date']
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank',
link=url,
version=get_metadata()['source_databases']['drugbank']['date']
version=version
)
return dataset
def get_pdr_nedrex_datasets(url):
return {'DrugBank': get_pdr_nedrex_drugbank(url), 'DrugCentral': get_pdr_nedrex_drugcentral(url)}
return {'drugbank': get_pdr_nedrex_drugbank(url), 'drugcentral': get_pdr_nedrex_drugcentral(url)}
def get_drdis_nedrex_datasets(url):
return {'ctd':get_drdis_nedrex_ctd(url), 'Drug Central':get_drdis_nedrex_drugcentral(url)}
return {'ctd':get_drdis_nedrex_ctd(url), 'drugcentral':get_drdis_nedrex_drugcentral(url)}
def get_ppi_nedrex_datasets(url):
return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)}
......
......@@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone
python3 manage.py migrate
python3 manage.py createfixtures
python3 manage.py cleanuptasks
python3 manage.py populate_db --update -a
#python3 manage.py populate_db --update -a
#python3 manage.py make_graphs
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
......@@ -45,6 +45,7 @@ def quick_task(task_hook: TaskHook):
"result_size": 20,
"include_non_approved_drugs": True,
"include_indirect_drugs": False,
"target":"drug"
})
tr_task_hook = TaskHook(parameters, task_hook.data_directory, progress, set_result)
......
......@@ -200,8 +200,6 @@ def trust_rank(task_hook: TaskHook):
task_hook.set_progress(0 / 4.0, "Parsing input.")
file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt")
g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target)
print(seed_ids)
print(drug_ids)
task_hook.set_progress(1 / 4.0, "Computing edge weights.")
weights = edge_weights(g, hub_penalty, inverse=True)
......
......@@ -37,7 +37,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
The graph indices for all drug nodes
"""
# Read the graph.
print(f"loading {file_path} for {target}")
g = gt.load_graph(file_path)
# g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms
......@@ -47,16 +47,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
# Delete all nodes that are not contained in the selected datasets and have degrees higher than max_deg
deleted_nodes = []
for node in range(g.num_vertices()):
#Remove all unconnected nodes TODO probably already skip when creating .gt files
if g.vertex(node).out_degree() == 0 and target == 'drug':
deleted_nodes.append(node)
# if not g.vertex_properties["name"][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
if not g.vertex_properties[node_name_attribute][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
elif not g.vertex_properties[node_name_attribute][node] in set(seeds) and (g.vertex(node).out_degree() > max_deg):
deleted_nodes.append(node)
# remove all drugs from graph if we are not looking for drugs
elif target != 'drug' and g.vertex_properties["type"][node] == d_type:
deleted_nodes.append(node)
g.remove_vertex(deleted_nodes, fast=True)
# Retrieve internal IDs of seed_ids and viral_protein_ids.
# Retrieve internal IDs of seed_ids
seeds = set(seeds)
print(seeds)
seed_ids = []
drug_ids = []
is_matched = {protein: False for protein in seeds}
......@@ -64,7 +68,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
node_type = g.vertex_properties["type"][node]
if g.vertex_properties[node_name_attribute][node] in seeds:
seed_ids.append(node)
is_matched[g.vertex_properties[node_name_attribute][node]] = True
is_matched[g.vertex_properties[node_name_attribute][node]] = node
if node_type == d_type:
if include_non_approved_drugs:
drug_ids.append(node)
......@@ -74,9 +78,11 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
drug_ids.append(node)
# Check that all seed seeds have been matched and throw error, otherwise.
# print(deleted_nodes)
print(seed_ids)
for protein, found in is_matched.items():
if not found:
raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
# Delete edges that should be ignored or are not contained in the selected dataset.
deleted_edges = []
......@@ -87,8 +93,6 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
direct_drugs.add(edge.target())
elif g.vertex_properties["type"][edge.source()] == d_type and edge.target() in seed_ids:
direct_drugs.add(edge.source())
for drug in direct_drugs:
print(int(drug))
for edge in g.edges():
if g.edge_properties["type"][edge] == 'drug-protein':
if g.vertex_properties["type"][edge.target()] == d_type:
......@@ -113,6 +117,17 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
for edge in deleted_edges:
g.remove_edge(edge)
g.set_fast_edge_removal(fast=False)
print("Drugs")
print(drug_ids)
print("Vertices")
vertices = 0
for _ in g.vertices():
vertices += 1
print(f'\t{vertices}')
print("Edges")
edges = 0
for _ in g.edges():
edges+=1
print(f'\t{edges}')
# Return the graph and the indices of the seed_ids and the seeds.
return g, seed_ids, drug_ids
......@@ -23,6 +23,7 @@ def scores_to_results(
else:
candidates = [(node, scores[node]) for node in range(g.num_vertices()) if scores[node] > 0 and node not in set(seed_ids)]
best_candidates = [item[0] for item in sorted(candidates, key=lambda item: item[1], reverse=True)[:result_size]]
print(f'Candidate list length: {len(best_candidates)}')
# Concatenate best result candidates with seeds and compute induced subgraph.
# since the result size filters out nodes, the result network is not complete anymore.
......@@ -82,6 +83,7 @@ def scores_to_results(
for edge in edges:
if ((edge.source(), edge.target()) not in returned_edges) or ((edge.target(), edge.source()) not in returned_edges):
returned_edges.add((edge.source(), edge.target()))
print(f'Returned nodes number: {len(returned_nodes)}')
subgraph = {
"nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes],
"edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for source, target in returned_edges],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment