Skip to content
Snippets Groups Projects
Commit 32f87d22 authored by Kurtz, Prof. Dr. Stefan's avatar Kurtz, Prof. Dr. Stefan
Browse files

Yersinia data set added

parent 6d1cc041
No related branches found
No related tags found
No related merge requests found
......@@ -4,5 +4,8 @@ test:testsuite/test_rep.sh
@testsuite/test_rep.sh testset1
@echo "all tests passed"
test_y:
src/phybema.py --tools mash andi dnadiff -- testdata/Yersinia/
clean:
@${RM} -r temp out
......@@ -25,6 +25,7 @@ def run_prog(progname,call_list,fixed_out_file_name,progoutfilepath):
sys.stderr.write("# starting program {}\n".format(progname))
start_time = datetime.now()
sys.stderr.write(' '.join(call_list))
try:
output = subprocess.run(call_list,stdout=subprocess.PIPE,
stderr=subprocess.PIPE, universal_newlines=True)
......
......@@ -7,13 +7,13 @@ to create a datatuplelist with the datasets and reftrees.
author: Birgitta Päuker
'''
import argparse, os, sys
import argparse, os, sys, re, subprocess
from argparse import RawTextHelpFormatter
from pathlib import Path
from estimators import DistanceEstimator, estimator_choices
from root_dir import phybema_root_dir
FASTA_SUFFIXES = [".fasta",".faa",".fna"]
FASTA_SUFFIXES = [".fasta",".faa",".fna",".fasta.gz"]
REFTREE_SUFFIXES = [".nh",".tre"]
# Function to parse the phybema options
......@@ -92,8 +92,12 @@ def create_datatuplelist(datasetpaths):
exit(1)
# Create list of files with FASTA_SUFFIXES suffix
datafilepaths = [f.path for f in os.scandir(datasetpath) if Path(f).is_file
and os.path.splitext(f)[1] in FASTA_SUFFIXES]
datafilepaths = list()
for f in os.scandir(datasetpath):
if Path(f).is_file:
for suffix in FASTA_SUFFIXES:
if re.search(r'{}$'.format(suffix),f.path):
datafilepaths.append(f.path)
# Check if a datafile was found
if not datafilepaths:
......@@ -113,6 +117,21 @@ def create_datatuplelist(datasetpaths):
sys.stderr.write("{}: The found datafile {} is empty.\n"
.format(sys.argv[0], datafile))
exit(1)
mo = re.search(r'(.*)\.gz$',datafile)
if mo:
uncompressed_file = "{}".format(mo.group(1))
if not os.path.isfile(uncompressed_file):
call_list = ['gzip','-k','-d',datafile]
sys.stderr.write('{}\n'.format(' '.join(call_list)))
try:
output = subprocess.run(call_list,stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
except:
sys.stderr.write('{}: executing {} failed\n'
.format(sys.argv[0],' '.join(call_list)))
exit(1)
datafile = uncompressed_file
# Create list of files with REFTREE_SUFFIXES suffix
reftreefilepaths = [f.path for f in os.scandir(datasetpath) if
......
sequence.fasta
File added
(NC_008150:0.00006,((NC_008149:0.00006,NC_004088:0.00003):0.00003,
(NC_005810:0.00021,((AAKT020000:0.00463,NC_006155:0.00213):0.00336,
Pestis_F:0.00008):0.00003):0.00003):0.00003,
NC_003143:0.00003);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment