- For the 48 kHz model [3], use `--backbone ncsnpp_48k --n_fft 1534 --hop_length 384 --spec_factor 0.065 --spec_abs_exponent 0.667 --sigma-min 0.1 --sigma-max 1.0 --theta 2.0`
- For the 48 kHz model [3], use `--backbone ncsnpp_48k --spec_factor 0.065 --spec_abs_exponent 0.667 --sigma-min 0.1 --sigma-max 1.0 --theta 2.0`
- Our Interspeech paper [1] uses `--backbone dcunet`. You need to pass `--n_fft 512` to make it work.
- Also note that the default parameters for the spectrogram transformation in this repository are slightly different from the ones listed in the first (Interspeech) paper (`--spec_factor 0.15` rather than `--spec_factor 0.333`), but we've found the value in this repository to generally perform better for both models [1] and [2].
@@ -18,13 +18,13 @@ class ScoreModel(pl.LightningModule):
defadd_argparse_args(parser):
parser.add_argument("--lr",type=float,default=1e-4,help="The learning rate (1e-4 by default)")
parser.add_argument("--ema_decay",type=float,default=0.999,help="The parameter EMA decay constant (0.999 by default)")
parser.add_argument("--t_eps",type=float,default=0.03,help="The minimum process time (0.03 by default)")
parser.add_argument("--t_eps",type=float,default=0.03,help="The minimum time (3e-2 by default)")
parser.add_argument("--num_eval_files",type=int,default=20,help="Number of files for speech enhancement performance evaluation during training. Pass 0 to turn off (no checkpoints based on evaluation metrics will be generated).")
parser.add_argument("--loss_type",type=str,default="mse",choices=("mse","mae"),help="The type of loss function to use.")