44# Feature extraction -> SAD -> Diarization -> ASR
55#
66# Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
7- # 2019 Desh Raj, David Snyder, Ashish Arora
7+ # 2019 Desh Raj, David Snyder, Ashish Arora, Zhaoheng Ni
88# Apache 2.0
99
1010# Begin configuration section.
1111nj=8
12- decode_nj=10
1312stage=0
1413sad_stage=0
14+ score_sad=true
1515diarizer_stage=0
1616decode_diarize_stage=0
1717score_stage=0
18+
1819enhancement=beamformit
1920
21+ # option to use the new RTTM reference for sad and diarization
22+ use_new_rttm_reference=false
23+ if $use_new_rttm_reference == " true" ; then
24+ git clone https://github.com/nateanl/chime6_rttm
25+ fi
26+
2027# chime5 main directory path
2128# please change the path accordingly
2229chime5_corpus=/export/corpora4/CHiME5
@@ -93,14 +100,15 @@ if [ $stage -le 1 ]; then
93100 " $PWD /${enhandir} /${dset} _${enhancement} _u0*" \
94101 ${json_dir} /${dset} data/${dset} _${enhancement} _dereverb
95102 done
103+
96104fi
97105
98106if [ $stage -le 2 ]; then
99107 # mfccdir should be some place with a largish disk where you
100108 # want to store MFCC features.
101109 mfccdir=mfcc
102110 for x in ${test_sets} ; do
103- steps/make_mfcc.sh --nj $decode_nj --cmd " $train_cmd " \
111+ steps/make_mfcc.sh --nj $nj --cmd " $train_cmd " \
104112 --mfcc-config conf/mfcc_hires.conf \
105113 data/$x exp/make_mfcc/$x $mfccdir
106114 done
@@ -121,18 +129,44 @@ if [ $stage -le 3 ]; then
121129 exit 0
122130 fi
123131 # Perform segmentation
124- local/segmentation/detect_speech_activity.sh --nj $decode_nj --stage $sad_stage \
132+ local/segmentation/detect_speech_activity.sh --nj $nj --stage $sad_stage \
125133 $test_set $sad_nnet_dir mfcc $sad_work_dir \
126134 data/${datadir} || exit 1
127135
128- mv data/${datadir} _seg data/${datadir} _${nnet_type} _seg
129- mv data/${datadir} /{segments.bak,utt2spk.bak} data/${datadir} _${nnet_type} _seg
136+ test_dir=data/${datadir} _${nnet_type} _seg
137+ mv data/${datadir} _seg ${test_dir} /
138+ cp data/${datadir} /{segments.bak,utt2spk.bak} ${test_dir} /
130139 # Generate RTTM file from segmentation performed by SAD. This can
131140 # be used to evaluate the performance of the SAD as an intermediate
132141 # step.
133142 steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
134- data/${datadir} _${nnet_type} _seg/utt2spk data/${datadir} _${nnet_type} _seg/segments \
135- data/${datadir} _${nnet_type} _seg/rttm
143+ ${test_dir} /utt2spk ${test_dir} /segments ${test_dir} /rttm
144+
145+ if [ $score_sad == " true" ]; then
146+ echo " Scoring $datadir .."
147+ # We first generate the reference RTTM from the backed up utt2spk and segments
148+ # files.
149+ ref_rttm=${test_dir} /ref_rttm
150+ steps/segmentation/convert_utt2spk_and_segments_to_rttm.py ${test_dir} /utt2spk.bak \
151+ ${test_dir} /segments.bak ${test_dir} /ref_rttm
152+
153+ # To score, we select just U06 segments from the hypothesis RTTM.
154+ hyp_rttm=${test_dir} /rttm.U06
155+ grep ' U06' ${test_dir} /rttm > ${test_dir} /rttm.U06
156+ echo " Array U06 selected for scoring.."
157+
158+ if $use_new_rttm_reference == " true" ; then
159+ echo " Use the new RTTM reference."
160+ mode=" $( cut -d' _' -f1 <<< " $datadir" ) "
161+ ref_rttm=./chime6_rttm/${mode} _rttm
162+ fi
163+
164+ sed ' s/_U0[1-6].ENH//g' $ref_rttm > $ref_rttm .scoring
165+ sed ' s/_U0[1-6].ENH//g' $hyp_rttm > $hyp_rttm .scoring
166+ cat ./local/uem_file | grep ' U06' | sed ' s/_U0[1-6]//g' > ./local/uem_file.tmp
167+ md-eval.pl -1 -c 0.25 -u ./local/uem_file.tmp -r $ref_rttm .scoring -s $hyp_rttm .scoring | \
168+ awk ' or(/MISSED SPEECH/,/FALARM SPEECH/)'
169+ fi
136170 done
137171fi
138172
141175# ######################################################################
142176if [ $stage -le 4 ]; then
143177 for datadir in ${test_sets} ; do
144- local/diarize.sh --nj 10 --cmd " $train_cmd " --stage $diarizer_stage \
178+ if $use_new_rttm_reference == " true" ; then
179+ mode=" $( cut -d' _' -f1 <<< " $datadir" ) "
180+ ref_rttm=./chime6_rttm/${mode} _rttm
181+ else
182+ ref_rttm=data/${datadir} _${nnet_type} _seg/ref_rttm
183+ fi
184+ local/diarize.sh --nj $nj --cmd " $train_cmd " --stage $diarizer_stage \
185+ --ref-rttm $ref_rttm \
145186 exp/xvector_nnet_1a \
146187 data/${datadir} _${nnet_type} _seg \
147188 exp/${datadir} _${nnet_type} _seg_diarization
@@ -156,7 +197,7 @@ if [ $stage -le 5 ]; then
156197 local/decode_diarized.sh --nj $nj --cmd " $decode_cmd " --stage $decode_diarize_stage \
157198 exp/${datadir} _${nnet_type} _seg_diarization data/$datadir data/lang \
158199 exp/chain_${train_set} _cleaned_rvb exp/nnet3_${train_set} _cleaned_rvb \
159- data/${datadir} _diarized
200+ data/${datadir} _diarized || exit 1
160201 done
161202fi
162203
0 commit comments