diff --git a/Makefile b/Makefile index 8a52da20b7796524e156f63922ed3a4d6f0034f8..d02bedf93dce0bf17a9e3d3b1d0c40791a24a2ef 100644 --- a/Makefile +++ b/Makefile @@ -4,5 +4,8 @@ test:testsuite/test_rep.sh @testsuite/test_rep.sh testset1 @echo "all tests passed" +test_y: + src/phybema.py --tools mash andi dnadiff -- testdata/Yersinia/ + clean: @${RM} -r temp out diff --git a/src/callProg.py b/src/callProg.py index 3aff7d22cdfd79015e93f7d8999032990d4d9494..967c0cb77a6a04ee364f22c667b592252c9ff531 100644 --- a/src/callProg.py +++ b/src/callProg.py @@ -25,6 +25,7 @@ def run_prog(progname,call_list,fixed_out_file_name,progoutfilepath): sys.stderr.write("# starting program {}\n".format(progname)) start_time = datetime.now() + sys.stderr.write(' '.join(call_list)) try: output = subprocess.run(call_list,stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) diff --git a/src/parser.py b/src/parser.py index 71fd9cca083b31330bd56462ebc23a037b5a0afe..16163eb47b3ec3255705c1f66c3fafe6f04d1587 100644 --- a/src/parser.py +++ b/src/parser.py @@ -7,13 +7,13 @@ to create a datatuplelist with the datasets and reftrees. author: Birgitta Päuker ''' -import argparse, os, sys +import argparse, os, sys, re, subprocess from argparse import RawTextHelpFormatter from pathlib import Path from estimators import DistanceEstimator, estimator_choices from root_dir import phybema_root_dir -FASTA_SUFFIXES = [".fasta",".faa",".fna"] +FASTA_SUFFIXES = [".fasta",".faa",".fna",".fasta.gz"] REFTREE_SUFFIXES = [".nh",".tre"] # Function to parse the phybema options @@ -92,8 +92,12 @@ def create_datatuplelist(datasetpaths): exit(1) # Create list of files with FASTA_SUFFIXES suffix - datafilepaths = [f.path for f in os.scandir(datasetpath) if Path(f).is_file - and os.path.splitext(f)[1] in FASTA_SUFFIXES] + datafilepaths = list() + for f in os.scandir(datasetpath): + if Path(f).is_file: + for suffix in FASTA_SUFFIXES: + if re.search(r'{}$'.format(suffix),f.path): + datafilepaths.append(f.path) # Check if a datafile was found if not datafilepaths: @@ -113,6 +117,21 @@ def create_datatuplelist(datasetpaths): sys.stderr.write("{}: The found datafile {} is empty.\n" .format(sys.argv[0], datafile)) exit(1) + mo = re.search(r'(.*)\.gz$',datafile) + if mo: + uncompressed_file = "{}".format(mo.group(1)) + if not os.path.isfile(uncompressed_file): + call_list = ['gzip','-k','-d',datafile] + sys.stderr.write('{}\n'.format(' '.join(call_list))) + try: + output = subprocess.run(call_list,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + except: + sys.stderr.write('{}: executing {} failed\n' + .format(sys.argv[0],' '.join(call_list))) + exit(1) + datafile = uncompressed_file # Create list of files with REFTREE_SUFFIXES suffix reftreefilepaths = [f.path for f in os.scandir(datasetpath) if diff --git a/testdata/Yersinia/.gitignore b/testdata/Yersinia/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..83836288e2db3c96f2e4892241bc2a4a262ccc1b --- /dev/null +++ b/testdata/Yersinia/.gitignore @@ -0,0 +1 @@ +sequence.fasta diff --git a/testdata/Yersinia/sequence.fasta.gz b/testdata/Yersinia/sequence.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..facbeef9e71b92ea380bb59a863ed6bc771dbcd7 Binary files /dev/null and b/testdata/Yersinia/sequence.fasta.gz differ diff --git a/testdata/Yersinia/tree.nh b/testdata/Yersinia/tree.nh new file mode 100644 index 0000000000000000000000000000000000000000..6700166e0e58358bee8f9704bda928ea5dcff18d --- /dev/null +++ b/testdata/Yersinia/tree.nh @@ -0,0 +1,4 @@ +(NC_008150:0.00006,((NC_008149:0.00006,NC_004088:0.00003):0.00003, +(NC_005810:0.00021,((AAKT020000:0.00463,NC_006155:0.00213):0.00336, +Pestis_F:0.00008):0.00003):0.00003):0.00003, +NC_003143:0.00003);