|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# 1c is as 1b, but uses more modern TDNN configuration. |
| 4 | + |
| 5 | +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_sp exp/nnet3_cleaned/tdnn_1c_sp |
| 6 | +# System tdnn_sp tdnn_1c_sp |
| 7 | +# WER on dev(fglarge) 4.52 4.20 |
| 8 | +# WER on dev(tglarge) 4.80 4.37 |
| 9 | +# WER on dev(tgmed) 6.02 5.31 |
| 10 | +# WER on dev(tgsmall) 6.80 5.86 |
| 11 | +# WER on dev_other(fglarge) 12.54 12.55 |
| 12 | +# WER on dev_other(tglarge) 13.16 13.00 |
| 13 | +# WER on dev_other(tgmed) 15.51 14.98 |
| 14 | +# WER on dev_other(tgsmall) 17.12 15.88 |
| 15 | +# WER on test(fglarge) 5.00 4.91 |
| 16 | +# WER on test(tglarge) 5.22 4.99 |
| 17 | +# WER on test(tgmed) 6.40 5.93 |
| 18 | +# WER on test(tgsmall) 7.14 6.49 |
| 19 | +# WER on test_other(fglarge) 12.56 12.94 |
| 20 | +# WER on test_other(tglarge) 13.04 13.38 |
| 21 | +# WER on test_other(tgmed) 15.58 15.11 |
| 22 | +# WER on test_other(tgsmall) 16.88 16.28 |
| 23 | +# Final train prob 0.7180 0.8509 |
| 24 | +# Final valid prob 0.7003 0.8157 |
| 25 | +# Final train prob (logLL) -0.9483 -0.4294 |
| 26 | +# Final valid prob (logLL) -0.9963 -0.5662 |
| 27 | +# Num-parameters 19268504 18391704 |
| 28 | + |
| 29 | +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn_sp |
| 30 | +# exp/nnet3_cleaned/tdnn_1c_sp: num-iters=1088 nj=3..16 num-params=18.4M dim=40+100->5784 combine=-0.43->-0.43 (over 4) loglike:train/valid[723,1087,combined]=(-0.48,-0.43,-0.43/-0.58,-0.57,-0.57) accuracy:train/valid[723,1087,combined]=(0.840,0.854,0.851/0.811,0.816,0.816) |
| 31 | + |
| 32 | +# this is the standard "tdnn" system, built in nnet3; it's what we use to |
| 33 | +# call multi-splice. |
| 34 | + |
| 35 | +# without cleanup: |
| 36 | +# local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" & |
| 37 | + |
| 38 | + |
| 39 | +# At this script level we don't support not running on GPU, as it would be painfully slow. |
| 40 | +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, |
| 41 | +# --num-threads 16 and --minibatch-size 128. |
| 42 | + |
| 43 | +# First the options that are passed through to run_ivector_common.sh |
| 44 | +# (some of which are also used in this script directly). |
| 45 | +stage=0 |
| 46 | +decode_nj=30 |
| 47 | +train_set=train_960_cleaned |
| 48 | +gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it |
| 49 | + # should have alignments for the specified training data. |
| 50 | +nnet3_affix=_cleaned |
| 51 | + |
| 52 | +# Options which are not passed through to run_ivector_common.sh |
| 53 | +affix= |
| 54 | +train_stage=-10 |
| 55 | +common_egs_dir= |
| 56 | +reporting_email= |
| 57 | +remove_egs=true |
| 58 | + |
| 59 | +. ./cmd.sh |
| 60 | +. ./path.sh |
| 61 | +. ./utils/parse_options.sh |
| 62 | + |
| 63 | + |
| 64 | +if ! cuda-compiled; then |
| 65 | + cat <<EOF && exit 1 |
| 66 | +This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA |
| 67 | +If you want to use GPUs (and have them), go to src/, and configure and make on a machine |
| 68 | +where "nvcc" is installed. |
| 69 | +EOF |
| 70 | +fi |
| 71 | + |
| 72 | +local/nnet3/run_ivector_common.sh --stage $stage \ |
| 73 | + --train-set $train_set \ |
| 74 | + --gmm $gmm \ |
| 75 | + --nnet3-affix "$nnet3_affix" || exit 1; |
| 76 | + |
| 77 | + |
| 78 | +gmm_dir=exp/${gmm} |
| 79 | +graph_dir=$gmm_dir/graph_tgsmall |
| 80 | +ali_dir=exp/${gmm}_ali_${train_set}_sp |
| 81 | +dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp |
| 82 | +train_data_dir=data/${train_set}_sp_hires |
| 83 | +train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires |
| 84 | + |
| 85 | + |
| 86 | +for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \ |
| 87 | + $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do |
| 88 | + [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 |
| 89 | +done |
| 90 | + |
| 91 | +if [ $stage -le 11 ]; then |
| 92 | + echo "$0: creating neural net configs"; |
| 93 | + |
| 94 | + num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}') |
| 95 | + |
| 96 | + mkdir -p $dir/configs |
| 97 | + cat <<EOF > $dir/configs/network.xconfig |
| 98 | + input dim=100 name=ivector |
| 99 | + input dim=40 name=input |
| 100 | + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat |
| 101 | +
|
| 102 | + relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536 |
| 103 | + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 |
| 104 | + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 |
| 105 | + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 |
| 106 | + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0 |
| 107 | + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 108 | + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 109 | + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 110 | + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 111 | + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 112 | + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 113 | + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 114 | + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 115 | + tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 116 | + tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 117 | + tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 118 | + tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 |
| 119 | + linear-component name=prefinal-l dim=256 $linear_opts |
| 120 | +
|
| 121 | + prefinal-layer name=prefinal input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 |
| 122 | + output-layer name=output input=prefinal dim=$num_targets max-change=1.5 |
| 123 | +EOF |
| 124 | + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ |
| 125 | + --config-dir $dir/configs || exit 1; |
| 126 | +fi |
| 127 | + |
| 128 | +if [ $stage -le 12 ]; then |
| 129 | + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then |
| 130 | + utils/create_split_dir.pl \ |
| 131 | + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage |
| 132 | + fi |
| 133 | + |
| 134 | + steps/nnet3/train_dnn.py --stage=$train_stage \ |
| 135 | + --cmd="$decode_cmd" \ |
| 136 | + --feat.online-ivector-dir $train_ivector_dir \ |
| 137 | + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ |
| 138 | + --trainer.num-epochs 4 \ |
| 139 | + --trainer.optimization.num-jobs-initial 3 \ |
| 140 | + --trainer.optimization.num-jobs-final 16 \ |
| 141 | + --trainer.optimization.initial-effective-lrate 0.0017 \ |
| 142 | + --trainer.optimization.final-effective-lrate 0.00017 \ |
| 143 | + --egs.dir "$common_egs_dir" \ |
| 144 | + --cleanup.remove-egs $remove_egs \ |
| 145 | + --cleanup.preserve-model-interval 100 \ |
| 146 | + --feat-dir=$train_data_dir \ |
| 147 | + --ali-dir $ali_dir \ |
| 148 | + --lang data/lang \ |
| 149 | + --reporting.email="$reporting_email" \ |
| 150 | + --dir=$dir || exit 1; |
| 151 | + |
| 152 | +fi |
| 153 | + |
| 154 | +if [ $stage -le 13 ]; then |
| 155 | + # this does offline decoding that should give about the same results as the |
| 156 | + # real online decoding (the one with --per-utt true) |
| 157 | + rm $dir/.error 2>/dev/null || true |
| 158 | + for test in test_clean test_other dev_clean dev_other; do |
| 159 | + ( |
| 160 | + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ |
| 161 | + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \ |
| 162 | + ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1 |
| 163 | + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ |
| 164 | + data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1 |
| 165 | + steps/lmrescore_const_arpa.sh \ |
| 166 | + --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ |
| 167 | + data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1 |
| 168 | + steps/lmrescore_const_arpa.sh \ |
| 169 | + --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ |
| 170 | + data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1 |
| 171 | + ) || touch $dir/.error & |
| 172 | + done |
| 173 | + wait |
| 174 | + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 |
| 175 | +fi |
| 176 | + |
| 177 | +exit 0; |
0 commit comments