Skip to content

Commit 6c7438f

Browse files
committed
Merge remote-tracking branch 'upstream/master' into sync-pybind11-with-master
2 parents 8edfa9d + 8e2bbd2 commit 6c7438f

24 files changed

+548
-43
lines changed

egs/librispeech/s5/RESULTS

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -465,25 +465,6 @@
465465
%WER 14.78 [ 7737 / 52343, 807 ins, 1115 del, 5815 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgmed/wer_15_0.0
466466
%WER 16.28 [ 8521 / 52343, 843 ins, 1258 del, 6420 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgsmall/wer_14_0.0
467467

468-
# Results with nnet3 tdnn with new configs, a.k.a. xconfig
469-
# local/nnet3/run_tdnn.sh (linked to local/nnet3/tuning/run_tdnn_1b.sh)
470-
%WER 4.60 [ 2502 / 54402, 324 ins, 286 del, 1892 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_clean_fglarge/wer_13_1.0
471-
%WER 4.80 [ 2612 / 54402, 350 ins, 285 del, 1977 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_clean_tglarge/wer_11_1.0
472-
%WER 5.97 [ 3248 / 54402, 460 ins, 310 del, 2478 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_clean_tgmed/wer_11_0.0
473-
%WER 6.66 [ 3625 / 54402, 479 ins, 392 del, 2754 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_clean_tgsmall/wer_11_0.0
474-
%WER 12.29 [ 6262 / 50948, 863 ins, 665 del, 4734 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_other_fglarge/wer_15_0.0
475-
%WER 12.89 [ 6565 / 50948, 773 ins, 853 del, 4939 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_other_tglarge/wer_14_0.5
476-
%WER 15.41 [ 7849 / 50948, 894 ins, 1083 del, 5872 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_other_tgmed/wer_15_0.0
477-
%WER 16.81 [ 8562 / 50948, 896 ins, 1215 del, 6451 sub ] exp/nnet3_cleaned/tdnn_sp/decode_dev_other_tgsmall/wer_14_0.0
478-
%WER 4.99 [ 2624 / 52576, 393 ins, 253 del, 1978 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_clean_fglarge/wer_13_0.5
479-
%WER 5.16 [ 2715 / 52576, 359 ins, 319 del, 2037 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_clean_tglarge/wer_12_1.0
480-
%WER 6.29 [ 3307 / 52576, 471 ins, 341 del, 2495 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_clean_tgmed/wer_12_0.0
481-
%WER 7.13 [ 3750 / 52576, 473 ins, 452 del, 2825 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_clean_tgsmall/wer_13_0.0
482-
%WER 12.73 [ 6665 / 52343, 894 ins, 711 del, 5060 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_fglarge/wer_14_0.0
483-
%WER 13.33 [ 6979 / 52343, 920 ins, 796 del, 5263 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tglarge/wer_14_0.0
484-
%WER 15.90 [ 8323 / 52343, 921 ins, 1126 del, 6276 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgmed/wer_13_0.0
485-
%WER 17.28 [ 9044 / 52343, 894 ins, 1372 del, 6778 sub ] exp/nnet3_cleaned/tdnn_sp/decode_test_other_tgsmall/wer_14_0.0
486-
487468
# Results with nnet3 tdnn+sMBR
488469
# local/nnet3/run_tdnn_discriminative.sh
489470
# a subset of the full list of results (using the acoustic model obtained at the end of the training):
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/bin/bash
2+
3+
# this script is used for comparing decoding results between systems.
4+
# e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
5+
# For use with discriminatively trained systems you specify the epochs after a colon:
6+
# for instance,
7+
# local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
8+
9+
10+
if [ $# == 0 ]; then
11+
echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
12+
echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
13+
echo "or (with epoch numbers for discriminative training):"
14+
echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
15+
exit 1
16+
fi
17+
18+
echo "# $0 $*"
19+
20+
include_looped=false
21+
if [ "$1" == "--looped" ]; then
22+
include_looped=true
23+
shift
24+
fi
25+
include_online=false
26+
if [ "$1" == "--online" ]; then
27+
include_online=true
28+
shift
29+
fi
30+
31+
32+
used_epochs=false
33+
34+
# this function set_names is used to separate the epoch-related parts of the name
35+
# [for discriminative training] and the regular parts of the name.
36+
# If called with a colon-free directory name, like:
37+
# set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
38+
# it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
39+
# If called with something like:
40+
# set_names exp/chain/tdnn_d_sp_smbr:3
41+
# it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
42+
43+
44+
set_names() {
45+
if [ $# != 1 ]; then
46+
echo "compare_wer_general.sh: internal error"
47+
exit 1 # exit the program
48+
fi
49+
dirname=$(echo $1 | cut -d: -f1)
50+
epoch=$(echo $1 | cut -s -d: -f2)
51+
if [ -z $epoch ]; then
52+
epoch_infix=""
53+
else
54+
used_epochs=true
55+
epoch_infix=_epoch${epoch}
56+
fi
57+
}
58+
59+
60+
61+
echo -n "# System "
62+
for x in $*; do printf "% 10s" " $(basename $x)"; done
63+
echo
64+
65+
strings=(
66+
"# WER on dev(fglarge) "
67+
"# WER on dev(tglarge) "
68+
"# WER on dev(tgmed) "
69+
"# WER on dev(tgsmall) "
70+
"# WER on dev_other(fglarge) "
71+
"# WER on dev_other(tglarge) "
72+
"# WER on dev_other(tgmed) "
73+
"# WER on dev_other(tgsmall) "
74+
"# WER on test(fglarge) "
75+
"# WER on test(tglarge) "
76+
"# WER on test(tgmed) "
77+
"# WER on test(tgsmall) "
78+
"# WER on test_other(fglarge) "
79+
"# WER on test_other(tglarge) "
80+
"# WER on test_other(tgmed) "
81+
"# WER on test_other(tgsmall) ")
82+
83+
for n in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
84+
echo -n "${strings[$n]}"
85+
for x in $*; do
86+
set_names $x # sets $dirname and $epoch_infix
87+
decode_names=(dev_clean_fglarge dev_clean_tglarge dev_clean_tgmed dev_clean_tgsmall dev_other_fglarge dev_other_tglarge dev_other_tgmed dev_other_tgsmall test_clean_fglarge test_clean_tglarge test_clean_tgmed test_clean_tgsmall test_other_fglarge test_other_tglarge test_other_tgmed test_other_tgsmall)
88+
89+
wer=$(grep WER $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
90+
printf "% 10s" $wer
91+
done
92+
echo
93+
if $include_looped; then
94+
echo -n "# [looped:] "
95+
for x in $*; do
96+
set_names $x # sets $dirname and $epoch_infix
97+
wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
98+
printf "% 10s" $wer
99+
done
100+
echo
101+
fi
102+
if $include_online; then
103+
echo -n "# [online:] "
104+
for x in $*; do
105+
set_names $x # sets $dirname and $epoch_infix
106+
wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
107+
printf "% 10s" $wer
108+
done
109+
echo
110+
fi
111+
done
112+
113+
114+
if $used_epochs; then
115+
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
116+
fi
117+
118+
119+
echo -n "# Final train prob "
120+
for x in $*; do
121+
prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep -v likelihood | awk '{printf("%.4f", $8)}')
122+
printf "% 10s" $prob
123+
done
124+
echo
125+
126+
echo -n "# Final valid prob "
127+
for x in $*; do
128+
prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep -v likelihood | awk '{printf("%.4f", $8)}')
129+
printf "% 10s" $prob
130+
done
131+
echo
132+
133+
echo -n "# Final train prob (logLL) "
134+
for x in $*; do
135+
prob=$(grep Overall $x/log/compute_prob_train.combined.log | grep -w likelihood | awk '{printf("%.4f", $8)}')
136+
printf "% 10s" $prob
137+
done
138+
echo
139+
140+
echo -n "# Final valid prob (logLL) "
141+
for x in $*; do
142+
prob=$(grep Overall $x/log/compute_prob_valid.combined.log | grep -w likelihood | awk '{printf("%.4f", $8)}')
143+
printf "% 10s" $prob
144+
done
145+
echo
146+
147+
echo -n "# Num-parameters "
148+
for x in $*; do
149+
num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
150+
printf "% 10d" $num_params
151+
done
152+
echo

egs/librispeech/s5/local/nnet3/tuning/run_tdnn_1b.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,34 @@
22

33
# 1b is as 1a but uses xconfigs.
44

5+
# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_sp
6+
# System tdnn_sp
7+
# WER on dev(fglarge) 4.52
8+
# WER on dev(tglarge) 4.80
9+
# WER on dev(tgmed) 6.02
10+
# WER on dev(tgsmall) 6.80
11+
# WER on dev_other(fglarge) 12.54
12+
# WER on dev_other(tglarge) 13.16
13+
# WER on dev_other(tgmed) 15.51
14+
# WER on dev_other(tgsmall) 17.12
15+
# WER on test(fglarge) 5.00
16+
# WER on test(tglarge) 5.22
17+
# WER on test(tgmed) 6.40
18+
# WER on test(tgsmall) 7.14
19+
# WER on test_other(fglarge) 12.56
20+
# WER on test_other(tglarge) 13.04
21+
# WER on test_other(tgmed) 15.58
22+
# WER on test_other(tgsmall) 16.88
23+
# Final train prob 0.7180
24+
# Final valid prob 0.7003
25+
# Final train prob (logLL) -0.9483
26+
# Final valid prob (logLL) -0.9963
27+
# Num-parameters 19268504
28+
29+
30+
# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn_sp
31+
# exp/nnet3_cleaned/tdnn_sp/: num-iters=1088 nj=3..16 num-params=19.3M dim=40+100->5784 combine=-0.94->-0.93 (over 7) loglike:train/valid[723,1087,combined]=(-0.99,-0.95,-0.95/-1.02,-0.99,-1.00) accuracy:train/valid[723,1087,combined]=(0.710,0.721,0.718/0.69,0.70,0.700)
32+
533
# this is the standard "tdnn" system, built in nnet3; it's what we use to
634
# call multi-splice.
735

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#!/bin/bash
2+
3+
# 1c is as 1b, but uses more modern TDNN configuration.
4+
5+
# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_sp exp/nnet3_cleaned/tdnn_1c_sp
6+
# System tdnn_sp tdnn_1c_sp
7+
# WER on dev(fglarge) 4.52 4.20
8+
# WER on dev(tglarge) 4.80 4.37
9+
# WER on dev(tgmed) 6.02 5.31
10+
# WER on dev(tgsmall) 6.80 5.86
11+
# WER on dev_other(fglarge) 12.54 12.55
12+
# WER on dev_other(tglarge) 13.16 13.00
13+
# WER on dev_other(tgmed) 15.51 14.98
14+
# WER on dev_other(tgsmall) 17.12 15.88
15+
# WER on test(fglarge) 5.00 4.91
16+
# WER on test(tglarge) 5.22 4.99
17+
# WER on test(tgmed) 6.40 5.93
18+
# WER on test(tgsmall) 7.14 6.49
19+
# WER on test_other(fglarge) 12.56 12.94
20+
# WER on test_other(tglarge) 13.04 13.38
21+
# WER on test_other(tgmed) 15.58 15.11
22+
# WER on test_other(tgsmall) 16.88 16.28
23+
# Final train prob 0.7180 0.8509
24+
# Final valid prob 0.7003 0.8157
25+
# Final train prob (logLL) -0.9483 -0.4294
26+
# Final valid prob (logLL) -0.9963 -0.5662
27+
# Num-parameters 19268504 18391704
28+
29+
# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn_sp
30+
# exp/nnet3_cleaned/tdnn_1c_sp: num-iters=1088 nj=3..16 num-params=18.4M dim=40+100->5784 combine=-0.43->-0.43 (over 4) loglike:train/valid[723,1087,combined]=(-0.48,-0.43,-0.43/-0.58,-0.57,-0.57) accuracy:train/valid[723,1087,combined]=(0.840,0.854,0.851/0.811,0.816,0.816)
31+
32+
# this is the standard "tdnn" system, built in nnet3; it's what we use to
33+
# call multi-splice.
34+
35+
# without cleanup:
36+
# local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" &
37+
38+
39+
# At this script level we don't support not running on GPU, as it would be painfully slow.
40+
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
41+
# --num-threads 16 and --minibatch-size 128.
42+
43+
# First the options that are passed through to run_ivector_common.sh
44+
# (some of which are also used in this script directly).
45+
stage=0
46+
decode_nj=30
47+
train_set=train_960_cleaned
48+
gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it
49+
# should have alignments for the specified training data.
50+
nnet3_affix=_cleaned
51+
52+
# Options which are not passed through to run_ivector_common.sh
53+
affix=
54+
train_stage=-10
55+
common_egs_dir=
56+
reporting_email=
57+
remove_egs=true
58+
59+
. ./cmd.sh
60+
. ./path.sh
61+
. ./utils/parse_options.sh
62+
63+
64+
if ! cuda-compiled; then
65+
cat <<EOF && exit 1
66+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
67+
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
68+
where "nvcc" is installed.
69+
EOF
70+
fi
71+
72+
local/nnet3/run_ivector_common.sh --stage $stage \
73+
--train-set $train_set \
74+
--gmm $gmm \
75+
--nnet3-affix "$nnet3_affix" || exit 1;
76+
77+
78+
gmm_dir=exp/${gmm}
79+
graph_dir=$gmm_dir/graph_tgsmall
80+
ali_dir=exp/${gmm}_ali_${train_set}_sp
81+
dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
82+
train_data_dir=data/${train_set}_sp_hires
83+
train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
84+
85+
86+
for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
87+
$graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
88+
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
89+
done
90+
91+
if [ $stage -le 11 ]; then
92+
echo "$0: creating neural net configs";
93+
94+
num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
95+
96+
mkdir -p $dir/configs
97+
cat <<EOF > $dir/configs/network.xconfig
98+
input dim=100 name=ivector
99+
input dim=40 name=input
100+
fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
101+
102+
relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
103+
tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
104+
tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
105+
tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
106+
tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
107+
tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
108+
tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
109+
tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
110+
tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
111+
tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
112+
tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
113+
tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
114+
tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
115+
tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
116+
tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
117+
tdnnf-layer name=tdnnf16 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
118+
tdnnf-layer name=tdnnf17 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
119+
linear-component name=prefinal-l dim=256 $linear_opts
120+
121+
prefinal-layer name=prefinal input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
122+
output-layer name=output input=prefinal dim=$num_targets max-change=1.5
123+
EOF
124+
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
125+
--config-dir $dir/configs || exit 1;
126+
fi
127+
128+
if [ $stage -le 12 ]; then
129+
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
130+
utils/create_split_dir.pl \
131+
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
132+
fi
133+
134+
steps/nnet3/train_dnn.py --stage=$train_stage \
135+
--cmd="$decode_cmd" \
136+
--feat.online-ivector-dir $train_ivector_dir \
137+
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
138+
--trainer.num-epochs 4 \
139+
--trainer.optimization.num-jobs-initial 3 \
140+
--trainer.optimization.num-jobs-final 16 \
141+
--trainer.optimization.initial-effective-lrate 0.0017 \
142+
--trainer.optimization.final-effective-lrate 0.00017 \
143+
--egs.dir "$common_egs_dir" \
144+
--cleanup.remove-egs $remove_egs \
145+
--cleanup.preserve-model-interval 100 \
146+
--feat-dir=$train_data_dir \
147+
--ali-dir $ali_dir \
148+
--lang data/lang \
149+
--reporting.email="$reporting_email" \
150+
--dir=$dir || exit 1;
151+
152+
fi
153+
154+
if [ $stage -le 13 ]; then
155+
# this does offline decoding that should give about the same results as the
156+
# real online decoding (the one with --per-utt true)
157+
rm $dir/.error 2>/dev/null || true
158+
for test in test_clean test_other dev_clean dev_other; do
159+
(
160+
steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
161+
--online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
162+
${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
163+
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
164+
data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1
165+
steps/lmrescore_const_arpa.sh \
166+
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
167+
data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
168+
steps/lmrescore_const_arpa.sh \
169+
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
170+
data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
171+
) || touch $dir/.error &
172+
done
173+
wait
174+
[ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
175+
fi
176+
177+
exit 0;

0 commit comments

Comments
 (0)