Skip to content
Snippets Groups Projects
Commit 59d8febe authored by AndiMajore's avatar AndiMajore
Browse files

small update but not all fixed

Former-commit-id: 9a544d3f4e0d6988be142678c41e287e4e669685 [formerly a54f02195d5fcb8eaae8a961cd91e5dc5a2305d5]
Former-commit-id: 7afc43bd1da646344f8fea1e0e6a3702c4995c72
parent d0654e51
No related branches found
No related tags found
No related merge requests found
...@@ -27,7 +27,6 @@ COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf ...@@ -27,7 +27,6 @@ COPY ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/ COPY ./python_nedrex/ /usr/src/drugstone/python_nedrex/
RUN pip install /usr/src/drugstone/python_nedrex/ RUN pip install /usr/src/drugstone/python_nedrex/
RUN mkdir store
COPY . /usr/src/drugstone/ COPY . /usr/src/drugstone/
......
...@@ -14,7 +14,6 @@ services: ...@@ -14,7 +14,6 @@ services:
volumes: volumes:
- drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations - drugstone_db_schema_volume:/usr/src/drugstone/drugstone/migrations
- drugstone_data_volume:/usr/src/drugstone/data - drugstone_data_volume:/usr/src/drugstone/data
- drugstone_store_volume:/usr/src/drugstone/store
ports: ports:
- 8001:8000 - 8001:8000
networks: networks:
...@@ -102,5 +101,4 @@ volumes: ...@@ -102,5 +101,4 @@ volumes:
drugstone_db_schema_volume: drugstone_db_schema_volume:
# external: true # external: true
drugstone_data_volume: drugstone_data_volume:
# external: true # external: true
drugstone_store_volume: \ No newline at end of file
\ No newline at end of file
...@@ -198,7 +198,7 @@ class NedrexImporter: ...@@ -198,7 +198,7 @@ class NedrexImporter:
e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein) e = models.ProteinDrugInteraction(pdi_dataset=dataset, drug=drug, protein=protein)
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.add(e) bulk.add(e)
for source in edge['databases']: for source in edge['dataSources']:
bulk.add(models.ProteinDrugInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein)) bulk.add(models.ProteinDrugInteraction(pdi_dataset=source_datasets[source], drug=drug, protein=protein))
except KeyError: except KeyError:
pass pass
...@@ -237,7 +237,7 @@ class NedrexImporter: ...@@ -237,7 +237,7 @@ class NedrexImporter:
e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2) e = models.ProteinProteinInteraction(ppi_dataset=dataset, from_protein=protein1, to_protein=protein2)
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.append(e) bulk.append(e)
for source in edge['assertedBy']: for source in edge['dataSources']:
bulk.append( bulk.append(
models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1, models.ProteinProteinInteraction(ppi_dataset=source_datasets[source], from_protein=protein1,
to_protein=protein2)) to_protein=protein2))
...@@ -268,7 +268,7 @@ class NedrexImporter: ...@@ -268,7 +268,7 @@ class NedrexImporter:
score=edge['score']) score=edge['score'])
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.add(e) bulk.add(e)
for source in edge['assertedBy']: for source in edge['dataSources']:
bulk.add( bulk.add(
models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder, models.ProteinDisorderAssociation(pdis_dataset=source_datasets[source], protein=protein, disorder=disorder,
score=edge['score'])) score=edge['score']))
...@@ -298,7 +298,7 @@ class NedrexImporter: ...@@ -298,7 +298,7 @@ class NedrexImporter:
e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder) e = models.DrugDisorderIndication(drdi_dataset=dataset, drug=drug, disorder=disorder)
if not update or e.__hash__() not in existing: if not update or e.__hash__() not in existing:
bulk.add(e) bulk.add(e)
for source in edge['assertedBy']: for source in edge['dataSources']:
bulk.add( bulk.add(
models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder)) models.DrugDisorderIndication(drdi_dataset=source_datasets[source], drug=drug, disorder=disorder))
except KeyError: except KeyError:
......
from requests.exceptions import RetryError
from drugstone import models from drugstone import models
from python_nedrex.static import get_metadata from python_nedrex.static import get_metadata
...@@ -23,28 +25,46 @@ def get_ppi_apid(): ...@@ -23,28 +25,46 @@ def get_ppi_apid():
def get_ppi_nedrex_biogrid(url): def get_ppi_nedrex_biogrid(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['biogrid']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='BioGRID', name='BioGRID',
link=url, link=url,
version=get_metadata()['source_databases']['biogrid']['date'] version=version
) )
return dataset return dataset
def get_ppi_nedrex_iid(url): def get_ppi_nedrex_iid(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['iid']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='IID', name='IID',
link=url, link=url,
version=get_metadata()['source_databases']['iid']['date'] version=version
) )
return dataset return dataset
def get_ppi_nedrex_intact(url): def get_ppi_nedrex_intact(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['intact']['date']
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='IntAct', name='IntAct',
link=url, link=url,
version=get_metadata()['source_databases']['intact']['date'] version=version
) )
return dataset return dataset
...@@ -59,37 +79,61 @@ def get_ppi_biogrid(): ...@@ -59,37 +79,61 @@ def get_ppi_biogrid():
def get_drug_target_nedrex(url): def get_drug_target_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
link=url, link=url,
version=get_metadata()['version'], version=version
) )
return dataset return dataset
def get_ppi_nedrex(url): def get_ppi_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PPIDataset.objects.get_or_create( dataset, _ = models.PPIDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
link=url, link=url,
version=get_metadata()['version'], version=version
) )
return dataset return dataset
def get_protein_disorder_nedrex(url): def get_protein_disorder_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create( dataset, _ = models.PDisDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
link=url, link=url,
version=get_metadata()['version'], version=version
) )
return dataset return dataset
def get_drug_disorder_nedrex(url): def get_drug_disorder_nedrex(url):
version = get_today_version()
try:
version = get_metadata()['version'],
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create( dataset, _ = models.DrDiDataset.objects.get_or_create(
name='NeDRex', name='NeDRex',
link=url, link=url,
version=get_metadata()['version'], version=version
) )
return dataset return dataset
...@@ -138,62 +182,103 @@ def get_drug_disorder_drugbank(): ...@@ -138,62 +182,103 @@ def get_drug_disorder_drugbank():
) )
return dataset return dataset
def get_today_version():
import datetime
now = datetime.date.today()
version = f'{now.year}-{now.month}-{now.day}_temp'
return version
def get_dis_prot_nedrex_disgenet(url): def get_dis_prot_nedrex_disgenet(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['disgenet']['date']
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create( dataset, _ = models.PDisDataset.objects.get_or_create(
name='DisGeNET', name='DisGeNET',
link=url, link=url,
version=get_metadata()['source_databases']['disgenet']['date'] version=version
) )
return dataset return dataset
def get_dis_prot_nedrex_omim(url): def get_dis_prot_nedrex_omim(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['omim']['date']
except RetryError:
pass
dataset, _ = models.PDisDataset.objects.get_or_create( dataset, _ = models.PDisDataset.objects.get_or_create(
name='OMIM', name='OMIM',
link=url, link=url,
version=get_metadata()['source_databases']['omim']['date'] version=version
) )
return dataset return dataset
def get_drdis_nedrex_drugcentral(url): def get_drdis_nedrex_drugcentral(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drug_central']['date']
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create( dataset, _ = models.DrDiDataset.objects.get_or_create(
name='Drug Central', name='Drug Central',
link=url, link=url,
version=get_metadata()['source_databases']['drug_central']['date'] version=version
) )
return dataset return dataset
def get_drdis_nedrex_ctd(url): def get_drdis_nedrex_ctd(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['ctd']['date']
except RetryError:
pass
dataset, _ = models.DrDiDataset.objects.get_or_create( dataset, _ = models.DrDiDataset.objects.get_or_create(
name='CTD', name='CTD',
link=url, link=url,
version=get_metadata()['source_databases']['ctd']['date'] version=version
) )
return dataset return dataset
def get_pdr_nedrex_drugcentral(url): def get_pdr_nedrex_drugcentral(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drug_central']['date']
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='Drug Central', name='Drug Central',
link=url, link=url,
version=get_metadata()['source_databases']['drug_central']['date'] version=version
) )
return dataset return dataset
def get_pdr_nedrex_drugbank(url): def get_pdr_nedrex_drugbank(url):
version = get_today_version()
try:
version = get_metadata()['source_databases']['drugbank']['date']
except RetryError:
pass
dataset, _ = models.PDIDataset.objects.get_or_create( dataset, _ = models.PDIDataset.objects.get_or_create(
name='DrugBank', name='DrugBank',
link=url, link=url,
version=get_metadata()['source_databases']['drugbank']['date'] version=version
) )
return dataset return dataset
def get_pdr_nedrex_datasets(url): def get_pdr_nedrex_datasets(url):
return {'DrugBank': get_pdr_nedrex_drugbank(url), 'DrugCentral': get_pdr_nedrex_drugcentral(url)} return {'drugbank': get_pdr_nedrex_drugbank(url), 'drugcentral': get_pdr_nedrex_drugcentral(url)}
def get_drdis_nedrex_datasets(url): def get_drdis_nedrex_datasets(url):
return {'ctd':get_drdis_nedrex_ctd(url), 'Drug Central':get_drdis_nedrex_drugcentral(url)} return {'ctd':get_drdis_nedrex_ctd(url), 'drugcentral':get_drdis_nedrex_drugcentral(url)}
def get_ppi_nedrex_datasets(url): def get_ppi_nedrex_datasets(url):
return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)} return {'biogrid':get_ppi_nedrex_biogrid(url), 'iid':get_ppi_nedrex_iid(url), 'intact':get_ppi_nedrex_intact(url)}
......
...@@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone ...@@ -4,7 +4,7 @@ python3 manage.py makemigrations drugstone
python3 manage.py migrate python3 manage.py migrate
python3 manage.py createfixtures python3 manage.py createfixtures
python3 manage.py cleanuptasks python3 manage.py cleanuptasks
python3 manage.py populate_db --update -a #python3 manage.py populate_db --update -a
#python3 manage.py make_graphs #python3 manage.py make_graphs
/usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf" /usr/bin/supervisord -c "/etc/supervisor/conf.d/supervisord.conf"
...@@ -45,6 +45,7 @@ def quick_task(task_hook: TaskHook): ...@@ -45,6 +45,7 @@ def quick_task(task_hook: TaskHook):
"result_size": 20, "result_size": 20,
"include_non_approved_drugs": True, "include_non_approved_drugs": True,
"include_indirect_drugs": False, "include_indirect_drugs": False,
"target":"drug"
}) })
tr_task_hook = TaskHook(parameters, task_hook.data_directory, progress, set_result) tr_task_hook = TaskHook(parameters, task_hook.data_directory, progress, set_result)
......
...@@ -200,8 +200,6 @@ def trust_rank(task_hook: TaskHook): ...@@ -200,8 +200,6 @@ def trust_rank(task_hook: TaskHook):
task_hook.set_progress(0 / 4.0, "Parsing input.") task_hook.set_progress(0 / 4.0, "Parsing input.")
file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt") file_path = os.path.join(task_hook.data_directory, f"internal_{ppi_dataset['name']}_{pdi_dataset['name']}.gt")
g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target) g, seed_ids, drug_ids = read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs, include_non_approved_drugs, search_target)
print(seed_ids)
print(drug_ids)
task_hook.set_progress(1 / 4.0, "Computing edge weights.") task_hook.set_progress(1 / 4.0, "Computing edge weights.")
weights = edge_weights(g, hub_penalty, inverse=True) weights = edge_weights(g, hub_penalty, inverse=True)
......
...@@ -37,7 +37,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -37,7 +37,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
The graph indices for all drug nodes The graph indices for all drug nodes
""" """
# Read the graph. # Read the graph.
print(f"loading {file_path} for {target}")
g = gt.load_graph(file_path) g = gt.load_graph(file_path)
# g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms # g = gtt.extract_largest_component(gg, directed=False, prune=True) # this line is added since we need to work with the LCC of the graphs for all algorithms
...@@ -47,16 +47,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -47,16 +47,20 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
# Delete all nodes that are not contained in the selected datasets and have degrees higher than max_deg # Delete all nodes that are not contained in the selected datasets and have degrees higher than max_deg
deleted_nodes = [] deleted_nodes = []
for node in range(g.num_vertices()): for node in range(g.num_vertices()):
#Remove all unconnected nodes TODO probably already skip when creating .gt files
if g.vertex(node).out_degree() == 0 and target == 'drug':
deleted_nodes.append(node)
# if not g.vertex_properties["name"][node] in set(seeds) and g.vertex(node).out_degree() > max_deg: # if not g.vertex_properties["name"][node] in set(seeds) and g.vertex(node).out_degree() > max_deg:
if not g.vertex_properties[node_name_attribute][node] in set(seeds) and g.vertex(node).out_degree() > max_deg: elif not g.vertex_properties[node_name_attribute][node] in set(seeds) and (g.vertex(node).out_degree() > max_deg):
deleted_nodes.append(node) deleted_nodes.append(node)
# remove all drugs from graph if we are not looking for drugs # remove all drugs from graph if we are not looking for drugs
elif target != 'drug' and g.vertex_properties["type"][node] == d_type: elif target != 'drug' and g.vertex_properties["type"][node] == d_type:
deleted_nodes.append(node) deleted_nodes.append(node)
g.remove_vertex(deleted_nodes, fast=True) g.remove_vertex(deleted_nodes, fast=True)
# Retrieve internal IDs of seed_ids and viral_protein_ids. # Retrieve internal IDs of seed_ids
seeds = set(seeds) seeds = set(seeds)
print(seeds)
seed_ids = [] seed_ids = []
drug_ids = [] drug_ids = []
is_matched = {protein: False for protein in seeds} is_matched = {protein: False for protein in seeds}
...@@ -64,7 +68,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -64,7 +68,7 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
node_type = g.vertex_properties["type"][node] node_type = g.vertex_properties["type"][node]
if g.vertex_properties[node_name_attribute][node] in seeds: if g.vertex_properties[node_name_attribute][node] in seeds:
seed_ids.append(node) seed_ids.append(node)
is_matched[g.vertex_properties[node_name_attribute][node]] = True is_matched[g.vertex_properties[node_name_attribute][node]] = node
if node_type == d_type: if node_type == d_type:
if include_non_approved_drugs: if include_non_approved_drugs:
drug_ids.append(node) drug_ids.append(node)
...@@ -74,9 +78,11 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -74,9 +78,11 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
drug_ids.append(node) drug_ids.append(node)
# Check that all seed seeds have been matched and throw error, otherwise. # Check that all seed seeds have been matched and throw error, otherwise.
# print(deleted_nodes)
print(seed_ids)
for protein, found in is_matched.items(): for protein, found in is_matched.items():
if not found: if not found:
raise ValueError("Invaliddd seed protein {}. No node named {} in {}.".format(protein, protein, file_path)) raise ValueError("Invalid seed protein {}. No node named {} in {}.".format(protein, protein, file_path))
# Delete edges that should be ignored or are not contained in the selected dataset. # Delete edges that should be ignored or are not contained in the selected dataset.
deleted_edges = [] deleted_edges = []
...@@ -87,8 +93,6 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -87,8 +93,6 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
direct_drugs.add(edge.target()) direct_drugs.add(edge.target())
elif g.vertex_properties["type"][edge.source()] == d_type and edge.target() in seed_ids: elif g.vertex_properties["type"][edge.source()] == d_type and edge.target() in seed_ids:
direct_drugs.add(edge.source()) direct_drugs.add(edge.source())
for drug in direct_drugs:
print(int(drug))
for edge in g.edges(): for edge in g.edges():
if g.edge_properties["type"][edge] == 'drug-protein': if g.edge_properties["type"][edge] == 'drug-protein':
if g.vertex_properties["type"][edge.target()] == d_type: if g.vertex_properties["type"][edge.target()] == d_type:
...@@ -113,6 +117,17 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals ...@@ -113,6 +117,17 @@ def read_graph_tool_graph(file_path, seeds, max_deg, include_indirect_drugs=Fals
for edge in deleted_edges: for edge in deleted_edges:
g.remove_edge(edge) g.remove_edge(edge)
g.set_fast_edge_removal(fast=False) g.set_fast_edge_removal(fast=False)
print("Drugs")
print(drug_ids)
print("Vertices")
vertices = 0
for _ in g.vertices():
vertices += 1
print(f'\t{vertices}')
print("Edges")
edges = 0
for _ in g.edges():
edges+=1
print(f'\t{edges}')
# Return the graph and the indices of the seed_ids and the seeds. # Return the graph and the indices of the seed_ids and the seeds.
return g, seed_ids, drug_ids return g, seed_ids, drug_ids
...@@ -23,6 +23,7 @@ def scores_to_results( ...@@ -23,6 +23,7 @@ def scores_to_results(
else: else:
candidates = [(node, scores[node]) for node in range(g.num_vertices()) if scores[node] > 0 and node not in set(seed_ids)] candidates = [(node, scores[node]) for node in range(g.num_vertices()) if scores[node] > 0 and node not in set(seed_ids)]
best_candidates = [item[0] for item in sorted(candidates, key=lambda item: item[1], reverse=True)[:result_size]] best_candidates = [item[0] for item in sorted(candidates, key=lambda item: item[1], reverse=True)[:result_size]]
print(f'Candidate list length: {len(best_candidates)}')
# Concatenate best result candidates with seeds and compute induced subgraph. # Concatenate best result candidates with seeds and compute induced subgraph.
# since the result size filters out nodes, the result network is not complete anymore. # since the result size filters out nodes, the result network is not complete anymore.
...@@ -82,6 +83,7 @@ def scores_to_results( ...@@ -82,6 +83,7 @@ def scores_to_results(
for edge in edges: for edge in edges:
if ((edge.source(), edge.target()) not in returned_edges) or ((edge.target(), edge.source()) not in returned_edges): if ((edge.source(), edge.target()) not in returned_edges) or ((edge.target(), edge.source()) not in returned_edges):
returned_edges.add((edge.source(), edge.target())) returned_edges.add((edge.source(), edge.target()))
print(f'Returned nodes number: {len(returned_nodes)}')
subgraph = { subgraph = {
"nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes], "nodes": [g.vertex_properties[node_name_attribute][node] for node in returned_nodes],
"edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for source, target in returned_edges], "edges": [{"from": g.vertex_properties[node_name_attribute][source], "to": g.vertex_properties[node_name_attribute][target]} for source, target in returned_edges],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment