@@ -14,7 +14,7 @@ set -e -o pipefail
1414
1515remove_egs=false
1616cmd=queue.pl
17- srand=0
17+ srand=-1
1818stage=0
1919train_stage=-10
2020get_egs_stage=-10
@@ -37,8 +37,9 @@ tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the
3737tdnn_affix= #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
3838feat_suffix=_hires
3939
40+ label_delay=5
4041frame_subsampling_factor=3
41- xent_regularize=0.1
42+ xent_regularize=0.025
4243max_param_change=2.0
4344num_jobs_initial=2
4445num_jobs_final=12
@@ -47,6 +48,8 @@ final_effective_lrate=0.0001
4748num_jobs_initial=2
4849num_jobs_final=8
4950chunk_width=150
51+ extra_left_context=50
52+ extra_right_context=0
5053common_egs_dir= # you can set this to use previously dumped egs.
5154langconf=local.conf
5255
8790
8891if [ "$speed_perturb" == "true" ]; then suffix=_sp; fi
8992dir=${dir}${suffix}
90- dir=exp/chain2_cleaned/tdnn_multi_sp_v7 /
93+ dir=exp/chain2_cleaned/tdnn_multi_sp_v12 /
9194
9295ivec_feat_suffix=${feat_suffix}
9396if $use_pitch; then feat_suffix=${feat_suffix}_pitch ; fi
@@ -263,8 +266,8 @@ if [ $stage -le 11 ]; then
263266 dummy_tree_dir=${multi_ali_treedirs[0]}
264267 num_targets=`tree-info $dummy_tree_dir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1;
265268 cat <<EOF > $dir/configs/network.xconfig
266- $ivector_node_xconfig
267269 input dim=$feat_dim name=input
270+ $ivector_node_xconfig
268271
269272 # please note that it is important to have input layer with the name=input
270273 # as the layer immediately preceding the fixed-affine-layer to enable
@@ -276,23 +279,24 @@ if [ $stage -le 11 ]; then
276279 relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=450
277280 relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=450
278281 relu-batchnorm-layer name=tdnn7 input=Append(-6,-3,0) dim=450
279- relu-batchnorm-layer name=tdnn_bn dim=$bnf_dim
282+ # relu-batchnorm-layer name=tdnn_bn dim=$bnf_dim
280283 # adding the layers for diffrent language's output
281284 # dummy output node
282- output-layer name=output dim=$num_targets max-change=1.5
283- output-layer name=output-xent input=tdnn_bn dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
285+ output-layer name=output dim=$num_targets max-change=1.5 output-delay=5
286+ output-layer name=output-xent input=tdnn7 dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
284287EOF
285288 # added separate outptut layer and softmax for all languages.
286289 for lang_index in `seq 0 $[$num_langs-1]`;do
287290 tree_dir=${multi_ali_treedirs[$lang_index]}
288291 num_targets=`tree-info $tree_dir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1;
289292
290293 lang_name=${lang_list[${lang_index}]}
291- echo "relu-renorm-layer name=prefinal-affine-lang-${lang_name} input=tdnn_bn dim=450 target-rms=0.5"
292- echo "output-layer name=output-${lang_name} dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5"
293- echo "output-layer name=output-${lang_name}-xent input=prefinal-affine-lang-${lang_name} dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5"
294+ # echo "relu-renorm-layer name=prefinal-affine-lang-${lang_name} input=tdnn7 dim=450 target-rms=0.5"
295+ echo "output-layer name=output-${lang_name} dim=$num_targets output-delay=5 input=tdnn7 max-change=1.5 include-log-softmax=false "
296+ echo "output-layer name=output-${lang_name}-xent input=tdnn7 output-delay=5 dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5"
294297 done >> $dir/configs/network.xconfig
295298
299+ lang_name=${lang_list[0]}
296300 steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
297301 --config-dir $dir/configs/
298302fi
324328if [ -z $model_right_context ]; then
325329 echo "ERROR: Cannot find entry for model_right_context in $dir/init/info.txt"
326330fi
327- egs_left_context=$[model_left_context+(frame_subsampling_factor/2)+egs_extra_left_context ]
328- egs_right_context=$[model_right_context+(frame_subsampling_factor/2)+egs_extra_right_context ]
331+ egs_left_context=$[model_left_context+(frame_subsampling_factor/2)+extra_left_context ]
332+ egs_right_context=$[model_right_context+(frame_subsampling_factor/2)+extra_right_context ]
329333
330334if [ $stage -le 13 ]; then
331335 for lang_index in `seq 0 $[$num_langs-1]`;do
405409
406410if [ $stage -le 17 ]; then
407411 echo "$0: Preparing initial acoustic model"
408- if [ -f $dir/configs/init.config ]; then
409- $cuda_cmd ${dir}/log/add_first_layer.log \
410- nnet3-init --srand=${srand} ${dir}/configs/init.raw \
411- ${dir}/configs/final.config ${dir}/init/default.raw || exit 1
412- else
413- $cuda_cmd ${dir}/log/init_model.log \
414- nnet3-init --srand=${srand} ${dir}/configs/final.config ${dir}/init/multi.raw || exit 1
415- fi
412+ $cuda_cmd ${dir}/log/init_model.log \
413+ nnet3-init --srand=${srand} ${dir}/configs/final.config ${dir}/init/multi.raw || exit 1
416414fi
417415
418416if [ $stage -le 18 ]; then
419417 echo "$0: Starting model training"
420418 steps/chain2/train.sh \
421419 --stage $train_stage --cmd "$cuda_cmd" \
422420 --multilingual-eg true \
423- --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.1 \
421+ --xent-regularize $xent_regularize --leaky-hmm-coefficient 0.25 --out-of-range-regularize 0.0 \
424422 --initial-effective-lrate $initial_effective_lrate \
425423 --final-effective-lrate $final_effective_lrate \
426424 --max-param-change $max_param_change \
427425 --groups-per-minibatch 128 \
428- --l2-regularize 10e-8 \
426+ --srand 1 \
427+ --shuffle-buffer-size 5000 \
428+ --l2-regularize 5e-5 \
429429 --num-jobs-initial $num_jobs_initial --num-jobs-final $num_jobs_final \
430430 $common_egs_dir $dir
431431fi
0 commit comments