@@ -9,14 +9,12 @@ stage=0
99
1010# GPU device id to use (count from 0).
1111# you can also set `CUDA_VISIBLE_DEVICES` and set `device_id=0`
12- device_id=6
12+ device_id=3
1313
1414nj=10
1515
16- lang=data/lang_chain # output lang dir
17- ali_dir=exp/tri5a_ali # input alignment dir
18- lat_dir=exp/tri5a_lats # input lat dir
19- treedir=exp/chain/tri5_tree # output tree dir
16+ train_set=train_cleaned
17+ gmm_dir=exp/tri3_cleaned
2018
2119# You should know how to calculate your model's left/right context **manually**
2220model_left_context=28
@@ -47,66 +45,106 @@ save_nn_output_as_compressed=false
4745
4846. parse_options.sh
4947
48+ ali_dir=${gmm_dir} _ali_${train_set} _sp # output ali dir
49+ lat_dir=${gmm_dir} _lat_${train_set} _sp # output lat dir
50+ tree_dir=${gmm_dir} _tree_${train_set} _sp # output lat dir
51+ train_data_dir=data/${train_set} _sp_hires
52+ lores_train_data_dir=data/${train_set} _sp
53+
5054if [[ $stage -le 0 ]]; then
51- for datadir in train dev test ; do
52- dst_dir=data/mfcc_hires/$datadir
53- if [[ ! -f $dst_dir /feats.scp ]]; then
54- echo " making mfcc features for LF-MMI training"
55- utils/copy_data_dir.sh data/$datadir $dst_dir
56- steps/make_mfcc.sh \
57- --mfcc-config conf/mfcc_hires.conf \
58- --cmd " $train_cmd " \
59- --nj $nj \
60- $dst_dir || exit 1
61- steps/compute_cmvn_stats.sh $dst_dir || exit 1
62- utils/fix_data_dir.sh $dst_dir
63- else
64- echo " $dst_dir /feats.scp already exists."
65- echo " kaldi (local/run_tdnn_1b.sh) LF-MMI may have generated it."
66- echo " skip $dst_dir "
67- fi
55+ echo " $0 : preparing directory for low-resolution speed-perturbed data (for alignment)"
56+ utils/data/perturb_data_dir_speed_3way.sh data/$train_set data/${train_set} _sp
57+
58+ for x in ${train_set} _sp dev test ; do
59+ utils/copy_data_dir.sh data/$x data/${x} _hires
6860 done
6961fi
7062
7163if [[ $stage -le 1 ]]; then
64+ echo " $0 : making MFCC features for low-resolution speed-perturbed data"
65+ steps/make_mfcc.sh --nj $nj --cmd " $train_cmd " data/${train_set} _sp
66+ steps/compute_cmvn_stats.sh data/${train_set} _sp
67+ echo " fixing input data-dir to remove nonexistent features, in case some "
68+ echo " .. speed-perturbed segments were too short."
69+ utils/fix_data_dir.sh data/${train_set} _sp
70+ fi
71+
72+ if [[ $stage -le 2 ]]; then
73+ echo " $0 : aligning with the perturbed low-resolution data"
74+ steps/align_fmllr.sh --nj $nj --cmd " $train_cmd " \
75+ data/${train_set} _sp data/lang $gmm_dir $ali_dir
76+ fi
77+
78+ if [[ $stage -le 3 ]]; then
79+ echo " $0 : creating high-resolution MFCC features"
80+
81+ # do volume-perturbation on the training data prior to extracting hires
82+ # features; this helps make trained nnets more invariant to test data volume.
83+ utils/data/perturb_data_dir_volume.sh data/${train_set} _sp_hires
84+
85+ for x in ${train_set} _sp dev test ; do
86+ steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
87+ --cmd " $train_cmd " data/${x} _hires
88+ steps/compute_cmvn_stats.sh data/${x} _hires
89+ utils/fix_data_dir.sh data/${x} _hires
90+ done
91+ fi
92+
93+ if [[ $stage -le 4 ]]; then
94+ for f in $gmm_dir /final.mdl $train_data_dir /feats.scp \
95+ $lores_train_data_dir /feats.scp $ali_dir /ali.1.gz $gmm_dir /final.mdl; do
96+ [ ! -f $f ] && echo " $0 : expected file $f to exist" && exit 1
97+ done
98+ fi
99+
100+ if [[ $stage -le 5 ]]; then
101+ echo " $0 : creating lang directory with one state per phone."
72102 # Create a version of the lang/ directory that has one state per phone in the
73103 # topo file. [note, it really has two states.. the first one is only repeated
74104 # once, the second one has zero or more repeats.]
75- rm -rf $lang
76- cp -r data/lang $lang
77- silphonelist=$( cat $lang /phones/silence.csl) || exit 1
78- nonsilphonelist=$( cat $lang /phones/nonsilence.csl) || exit 1
105+ cp -r data/lang data/lang_chain
106+ silphonelist=$( cat data/lang_chain/phones/silence.csl) || exit 1;
107+ nonsilphonelist=$( cat data/lang_chain/phones/nonsilence.csl) || exit 1;
79108 # Use our special topology... note that later on may have to tune this
80109 # topology.
81- steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist > $lang /topo
110+ steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist > data/lang_chain /topo
82111fi
83112
84- if [[ $stage -le 2 ]]; then
85- # Build a tree using our new topology. This is the critically different
86- # step compared with other recipes.
113+ if [[ $stage -le 6 ]]; then
114+ # Get the alignments as lattices (gives the chain training more freedom).
115+ # use the same num-jobs as the alignments
116+ steps/align_fmllr_lats.sh --nj $nj --cmd " $train_cmd " ${lores_train_data_dir} \
117+ data/lang $gmm_dir $lat_dir
118+ rm $lat_dir /fsts.* .gz # save space
119+ fi
120+
121+ if [[ $stage -le 7 ]]; then
122+ # Build a tree using our new topology. We know we have alignments for the
123+ # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
124+ # those.
87125 steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
88126 --context-opts " --context-width=2 --central-position=1" \
89- --cmd " $train_cmd " 5000 data/mfcc/train $lang $ ali_dir $treedir
127+ --cmd " $train_cmd " 4000 ${lores_train_data_dir} data/lang_chain $ ali_dir $tree_dir
90128fi
91129
92- if [[ $stage -le 3 ]]; then
93- echo " creating phone language-model"
130+ if [[ $stage -le 8 ]]; then
131+ echo " $0 : creating phone language-model"
94132 " $train_cmd " exp/chain/log/make_phone_lm.log \
95133 chain-est-phone-lm \
96- " ark:gunzip -c $treedir /ali.*.gz | ali-to-phones $treedir /final.mdl ark:- ark:- |" \
134+ " ark:gunzip -c $tree_dir /ali.*.gz | ali-to-phones $tree_dir /final.mdl ark:- ark:- |" \
97135 exp/chain/phone_lm.fst || exit 1
98136fi
99137
100- if [[ $stage -le 4 ]]; then
138+ if [[ $stage -le 9 ]]; then
101139 echo " creating denominator FST"
102- copy-transition-model $treedir /final.mdl exp/chain/0.trans_mdl
103- cp $treedir /tree exp/chain
140+ copy-transition-model $tree_dir /final.mdl exp/chain/0.trans_mdl
141+ cp $tree_dir /tree exp/chain
104142 " $train_cmd " exp/chain/log/make_den_fst.log \
105143 chain-make-den-fst exp/chain/tree exp/chain/0.trans_mdl exp/chain/phone_lm.fst \
106144 exp/chain/den.fst exp/chain/normalization.fst || exit 1
107145fi
108146
109- if [[ $stage -le 5 ]]; then
147+ if [[ $stage -le 10 ]]; then
110148 echo " generating egs"
111149 steps/nnet3/chain/get_egs.sh \
112150 --alignment-subsampling-factor 3 \
@@ -125,14 +163,14 @@ if [[ $stage -le 5 ]]; then
125163 --right-tolerance 5 \
126164 --srand 0 \
127165 --stage -10 \
128- data/mfcc_hires/train \
166+ $train_data_dir
129167 exp/chain $lat_dir exp/chain/egs
130168fi
131169
132170feat_dim=$( cat exp/chain/egs/info/feat_dim)
133171output_dim=$( cat exp/chain/egs/info/num_pdfs)
134172
135- if [[ $stage -le 6 ]]; then
173+ if [[ $stage -le 11 ]]; then
136174 echo " merging egs"
137175 mkdir -p exp/chain/merged_egs
138176 num_egs=$( ls -1 exp/chain/egs/cegs* .ark | wc -l)
@@ -145,15 +183,15 @@ if [[ $stage -le 6 ]]; then
145183 rm exp/chain/egs/cegs.* .ark
146184fi
147185
148- if [[ $stage -le 7 ]]; then
186+ if [[ $stage -le 12 ]]; then
149187 # Note: it might appear that this $lang directory is mismatched, and it is as
150188 # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
151189 # the lang directory.
152190 local/mkgraph.sh --self-loop-scale 1.0 data/lang_test exp/chain exp/chain/graph
153191fi
154192
155- if [[ $stage -le 8 ]]; then
156- echo " training..."
193+ if [[ $stage -le 13 ]]; then
194+ echo " $0 : training..."
157195
158196 mkdir -p exp/chain/train/tensorboard
159197 train_checkpoint=
@@ -187,22 +225,22 @@ if [[ $stage -le 8 ]]; then
187225 --train.xent-regularize 0.1
188226fi
189227
190- if [[ $stage -le 9 ]]; then
228+ if [[ $stage -le 14 ]]; then
191229 echo " inference: computing likelihood"
192230 for x in test dev; do
193- mkdir -p exp/chain/inference/$x
194- if [[ -f exp/chain/inference/$x /nnet_output.scp ]]; then
195- echo " exp/chain/inference/$x /nnet_output.scp already exists! Skip"
231+ mkdir -p exp/chain/inference/${x} _hires
232+ if [[ -f exp/chain/inference/${x} _hires /nnet_output.scp ]]; then
233+ echo " exp/chain/inference/${x} _hires /nnet_output.scp already exists! Skip"
196234 else
197235 best_epoch=$( cat exp/chain/train/best-epoch-info | grep ' best epoch' | awk ' {print $NF}' )
198236 inference_checkpoint=exp/chain/train/epoch-${best_epoch} .pt
199237 python3 ./chain/inference.py \
200238 --bottleneck-dim $bottleneck_dim \
201239 --checkpoint $inference_checkpoint \
202240 --device-id $device_id \
203- --dir exp/chain/inference/$x \
241+ --dir exp/chain/inference/${x} _hires \
204242 --feat-dim $feat_dim \
205- --feats-scp data/mfcc_hires/ $x /feats.scp \
243+ --feats-scp data/${x} _hires /feats.scp \
206244 --hidden-dim $hidden_dim \
207245 --is-training false \
208246 --kernel-size-list " $kernel_size_list " \
@@ -217,36 +255,36 @@ if [[ $stage -le 9 ]]; then
217255 done
218256fi
219257
220- if [[ $stage -le 10 ]]; then
258+ if [[ $stage -le 15 ]]; then
221259 echo " decoding"
222260 for x in test dev; do
223- if [[ ! -f exp/chain/inference/$x /nnet_output.scp ]]; then
224- echo " exp/chain/inference/$x /nnet_output.scp does not exist!"
261+ if [[ ! -f exp/chain/inference/${x} _hires /nnet_output.scp ]]; then
262+ echo " exp/chain/inference/${x} _hires /nnet_output.scp does not exist!"
225263 echo " Please run inference.py first"
226264 exit 1
227265 fi
228- echo " decoding $x "
266+ echo " decoding ${x} _hires "
229267
230268 ./local/decode.sh \
231269 --nj $nj \
232270 exp/chain/graph \
233271 exp/chain/0.trans_mdl \
234- exp/chain/inference/$x /nnet_output.scp \
235- exp/chain/decode_res/$x
272+ exp/chain/inference/${x} _hires /nnet_output.scp \
273+ exp/chain/decode_res/${x} _hires
236274 done
237275fi
238276
239- if [[ $stage -le 11 ]]; then
277+ if [[ $stage -le 16 ]]; then
240278 echo " scoring"
241279
242280 for x in test dev; do
243281 ./local/score.sh --cmd " $decode_cmd " \
244- data/mfcc_hires/ $x \
282+ data/${x} _hires \
245283 exp/chain/graph \
246- exp/chain/decode_res/$x || exit 1
284+ exp/chain/decode_res/${x} _hires || exit 1
247285 done
248286
249287 for x in test dev; do
250- head exp/chain/decode_res/$x /scoring_kaldi/best_*
288+ head exp/chain/decode_res/${x} _hires /scoring_kaldi/best_*
251289 done
252290fi
0 commit comments