Skip to content

Commit 44c8805

Browse files
authored
Merge pull request #3996 from naxingyu/sync-pybind11-with-master
Sync pybind11 with master
2 parents 1ffb881 + f1f60f8 commit 44c8805

File tree

19 files changed

+196
-60
lines changed

19 files changed

+196
-60
lines changed

egs/chime6/s5_track2/RESULTS

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,23 @@
11
# Results for Chime-6 track 2 for dev and eval, using pretrained models
22
# available at http://kaldi-asr.org/models/m12.
33

4-
# Speech Activity Detection (SAD)
5-
Missed speech False alarm Total error
6-
Dev 4.3 2.1 6.4
7-
Eval 5.6 5.9 11.5
4+
# These results are reported only for array U06, which is the default
5+
# array selection method in the baseline system.
86

9-
# The results for the remaining pipeline are only for array U06.
7+
# Speech Activity Detection (SAD)
8+
Missed speech False alarm Total error
9+
Dev (old RTTM) 2.5 0.8 3.3
10+
Dev (new RTTM) 1.9 0.7 2.6
11+
Eval (old RTTM) 4.1 1.8 5.9
12+
Eval (new RTTM) 4.3 1.5 5.8
1013

1114
# Diarization
12-
DER JER
13-
Dev 57.15 83.96
14-
Eval 54.12 80.33
15+
DER JER
16+
Dev (old RTTM) 61.56 69.75
17+
Dev (new RTTM) 63.42 70.83
18+
Eval (old RTTM) 61.96 71.40
19+
Eval (new RTTM) 68.20 72.54
1520

1621
# ASR nnet3 tdnn+chain
17-
Dev: %WER 84.33 [ 49653 / 58881, 1529 ins, 35813 del, 12311 sub ]
18-
Eval: %WER 78.08 [ 43046 / 55132, 957 ins, 32045 del, 10044 sub ]
22+
Dev: %WER 84.25 [ 49610 / 58881, 1937 ins, 34685 del, 12988 sub ]
23+
Eval: %WER 77.94 [ 42971 / 55132, 1086 ins, 30839 del, 11046 sub ]

egs/chime6/s5_track2/local/decode.sh

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,26 @@
44
# Feature extraction -> SAD -> Diarization -> ASR
55
#
66
# Copyright 2017 Johns Hopkins University (Author: Shinji Watanabe and Yenda Trmal)
7-
# 2019 Desh Raj, David Snyder, Ashish Arora
7+
# 2019 Desh Raj, David Snyder, Ashish Arora, Zhaoheng Ni
88
# Apache 2.0
99

1010
# Begin configuration section.
1111
nj=8
12-
decode_nj=10
1312
stage=0
1413
sad_stage=0
14+
score_sad=true
1515
diarizer_stage=0
1616
decode_diarize_stage=0
1717
score_stage=0
18+
1819
enhancement=beamformit
1920

21+
# option to use the new RTTM reference for sad and diarization
22+
use_new_rttm_reference=false
23+
if $use_new_rttm_reference == "true"; then
24+
git clone https://github.com/nateanl/chime6_rttm
25+
fi
26+
2027
# chime5 main directory path
2128
# please change the path accordingly
2229
chime5_corpus=/export/corpora4/CHiME5
@@ -93,14 +100,15 @@ if [ $stage -le 1 ]; then
93100
"$PWD/${enhandir}/${dset}_${enhancement}_u0*" \
94101
${json_dir}/${dset} data/${dset}_${enhancement}_dereverb
95102
done
103+
96104
fi
97105

98106
if [ $stage -le 2 ]; then
99107
# mfccdir should be some place with a largish disk where you
100108
# want to store MFCC features.
101109
mfccdir=mfcc
102110
for x in ${test_sets}; do
103-
steps/make_mfcc.sh --nj $decode_nj --cmd "$train_cmd" \
111+
steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \
104112
--mfcc-config conf/mfcc_hires.conf \
105113
data/$x exp/make_mfcc/$x $mfccdir
106114
done
@@ -121,18 +129,44 @@ if [ $stage -le 3 ]; then
121129
exit 0
122130
fi
123131
# Perform segmentation
124-
local/segmentation/detect_speech_activity.sh --nj $decode_nj --stage $sad_stage \
132+
local/segmentation/detect_speech_activity.sh --nj $nj --stage $sad_stage \
125133
$test_set $sad_nnet_dir mfcc $sad_work_dir \
126134
data/${datadir} || exit 1
127135

128-
mv data/${datadir}_seg data/${datadir}_${nnet_type}_seg
129-
mv data/${datadir}/{segments.bak,utt2spk.bak} data/${datadir}_${nnet_type}_seg
136+
test_dir=data/${datadir}_${nnet_type}_seg
137+
mv data/${datadir}_seg ${test_dir}/
138+
cp data/${datadir}/{segments.bak,utt2spk.bak} ${test_dir}/
130139
# Generate RTTM file from segmentation performed by SAD. This can
131140
# be used to evaluate the performance of the SAD as an intermediate
132141
# step.
133142
steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \
134-
data/${datadir}_${nnet_type}_seg/utt2spk data/${datadir}_${nnet_type}_seg/segments \
135-
data/${datadir}_${nnet_type}_seg/rttm
143+
${test_dir}/utt2spk ${test_dir}/segments ${test_dir}/rttm
144+
145+
if [ $score_sad == "true" ]; then
146+
echo "Scoring $datadir.."
147+
# We first generate the reference RTTM from the backed up utt2spk and segments
148+
# files.
149+
ref_rttm=${test_dir}/ref_rttm
150+
steps/segmentation/convert_utt2spk_and_segments_to_rttm.py ${test_dir}/utt2spk.bak \
151+
${test_dir}/segments.bak ${test_dir}/ref_rttm
152+
153+
# To score, we select just U06 segments from the hypothesis RTTM.
154+
hyp_rttm=${test_dir}/rttm.U06
155+
grep 'U06' ${test_dir}/rttm > ${test_dir}/rttm.U06
156+
echo "Array U06 selected for scoring.."
157+
158+
if $use_new_rttm_reference == "true"; then
159+
echo "Use the new RTTM reference."
160+
mode="$(cut -d'_' -f1 <<<"$datadir")"
161+
ref_rttm=./chime6_rttm/${mode}_rttm
162+
fi
163+
164+
sed 's/_U0[1-6].ENH//g' $ref_rttm > $ref_rttm.scoring
165+
sed 's/_U0[1-6].ENH//g' $hyp_rttm > $hyp_rttm.scoring
166+
cat ./local/uem_file | grep 'U06' | sed 's/_U0[1-6]//g' > ./local/uem_file.tmp
167+
md-eval.pl -1 -c 0.25 -u ./local/uem_file.tmp -r $ref_rttm.scoring -s $hyp_rttm.scoring |\
168+
awk 'or(/MISSED SPEECH/,/FALARM SPEECH/)'
169+
fi
136170
done
137171
fi
138172

@@ -141,7 +175,14 @@ fi
141175
#######################################################################
142176
if [ $stage -le 4 ]; then
143177
for datadir in ${test_sets}; do
144-
local/diarize.sh --nj 10 --cmd "$train_cmd" --stage $diarizer_stage \
178+
if $use_new_rttm_reference == "true"; then
179+
mode="$(cut -d'_' -f1 <<<"$datadir")"
180+
ref_rttm=./chime6_rttm/${mode}_rttm
181+
else
182+
ref_rttm=data/${datadir}_${nnet_type}_seg/ref_rttm
183+
fi
184+
local/diarize.sh --nj $nj --cmd "$train_cmd" --stage $diarizer_stage \
185+
--ref-rttm $ref_rttm \
145186
exp/xvector_nnet_1a \
146187
data/${datadir}_${nnet_type}_seg \
147188
exp/${datadir}_${nnet_type}_seg_diarization
@@ -156,7 +197,7 @@ if [ $stage -le 5 ]; then
156197
local/decode_diarized.sh --nj $nj --cmd "$decode_cmd" --stage $decode_diarize_stage \
157198
exp/${datadir}_${nnet_type}_seg_diarization data/$datadir data/lang \
158199
exp/chain_${train_set}_cleaned_rvb exp/nnet3_${train_set}_cleaned_rvb \
159-
data/${datadir}_diarized
200+
data/${datadir}_diarized || exit 1
160201
done
161202
fi
162203

egs/chime6/s5_track2/local/decode_diarized.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,18 @@ if [ $stage -le 0 ]; then
3838
echo "$0 copying data files in output directory"
3939
cp $rttm_dir/rttm $rttm_dir/rttm_1
4040
sed -i 's/'.ENH'/''/g' $rttm_dir/rttm_1
41+
# removing participant introduction from the hypothesis rttm
42+
# UEM file contains the scoring durations for each recording
43+
local/truncate_rttm.py $rttm_dir/rttm_1 local/uem_file $rttm_dir/rttm_introduction_removed
4144
mkdir -p ${out_dir}_hires
4245
cp ${data_in}/{wav.scp,utt2spk} ${out_dir}_hires
4346
utils/data/get_reco2dur.sh ${out_dir}_hires
4447
fi
4548

4649
if [ $stage -le 1 ]; then
4750
echo "$0 creating segments file from rttm and utt2spk, reco2file_and_channel "
48-
local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm_1 \
49-
<(awk '{print $2".ENH "$2" "$3}' $rttm_dir/rttm_1 |sort -u) \
51+
local/convert_rttm_to_utt2spk_and_segments.py --append-reco-id-to-spkr=true $rttm_dir/rttm_introduction_removed \
52+
<(awk '{print $2".ENH "$2" "$3}' $rttm_dir/rttm_introduction_removed |sort -u) \
5053
${out_dir}_hires/utt2spk ${out_dir}_hires/segments
5154

5255
utils/utt2spk_to_spk2utt.pl ${out_dir}_hires/utt2spk > ${out_dir}_hires/spk2utt

egs/chime6/s5_track2/local/diarize.sh

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
#!/usr/bin/env bash
2-
# Copyright 2019 David Snder
1+
#!/bin/bash
2+
# Copyright 2019 David Snyder
3+
# 2020 Desh Raj
4+
35
# Apache 2.0.
46
#
57
# This script takes an input directory that has a segments file (and
@@ -20,7 +22,7 @@ if [ $# != 3 ]; then
2022
echo "Options: "
2123
echo " --nj <nj> # number of parallel jobs."
2224
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
23-
echo " --ref-rttm <path to reference RTTM> # if present, used to score output RTTM."
25+
echo " --ref_rttm ./local/dev_rttm # the location of the reference RTTM file"
2426
exit 1;
2527
fi
2628

@@ -85,29 +87,33 @@ if [ $stage -le 4 ]; then
8587
echo "$0: wrote RTTM to output directory ${out_dir}"
8688
fi
8789

90+
hyp_rttm=${out_dir}/rttm
91+
8892
# For scoring the diarization system, we use the same tool that was
8993
# used in the DIHARD II challenge. This is available at:
9094
# https://github.com/nryant/dscore
95+
# Note that the scoring takes a single reference RTTM and a single
96+
# hypothesis RTTM.
9197
if [ $stage -le 5 ]; then
9298
# If a reference RTTM file is not provided, we create one using the backed up
9399
# segments and utt2spk files in the original data directory.
94-
if [ -z $ref_rttm ]; then
95-
ref_rttm=data/$name/rttm
96-
echo "$0: preparing ref RTTM file from segments and utt2spk"
100+
if [ -z "$ref_rttm" ]; then
97101
steps/segmentation/convert_utt2spk_and_segments_to_rttm.py data/$name/utt2spk.bak \
98-
data/$name/segments.bak $ref_rttm
102+
data/$name/segments.bak data/$name/rttm
103+
ref_rttm=data/$name/rttm
99104
fi
100-
grep 'U06' $ref_rttm > ${ref_rttm}.U06
101-
ref_rttm_path=$(readlink -f ${ref_rttm}.U06)
102-
out_rttm_path=$(readlink -f $out_dir/rttm)
105+
echo "Diarization results for "${name}
103106
if ! [ -d dscore ]; then
104107
git clone https://github.com/nryant/dscore.git || exit 1;
105108
cd dscore
106109
python -m pip install --user -r requirements.txt
107110
cd ..
108111
fi
109-
cd dscore
110-
python score.py -r $ref_rttm_path -s $out_rttm_path
111-
cd ..
112+
sed 's/_U0[1-6]\.ENH//g' $ref_rttm > $ref_rttm.scoring
113+
sed 's/_U0[1-6]\.ENH//g' $hyp_rttm > $hyp_rttm.scoring
114+
ref_rttm_path=$(readlink -f ${ref_rttm}.scoring)
115+
hyp_rttm_path=$(readlink -f ${hyp_rttm}.scoring)
116+
cat ./local/uem_file | grep 'U06' | sed 's/_U0[1-6]//g' > ./local/uem_file.scoring
117+
cd dscore && python score.py -u ../local/uem_file.scoring -r $ref_rttm_path \
118+
-s $hyp_rttm_path && cd .. || exit 1;
112119
fi
113-
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/usr/bin/env bash
2+
3+
# Installs dscore
4+
git clone https://github.com/nryant/dscore.git
5+
pip3 install intervaltree --user
6+
pip3 install tabulate --user
7+
pip3 install munkres --user
8+
pip3 install pytest --user
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/env python3
2+
# Apache 2.0
3+
# This script truncates the rttm file
4+
# using UEM file and writes it to a new rttm file
5+
#
6+
from __future__ import print_function
7+
from __future__ import unicode_literals
8+
9+
import argparse
10+
from scorelib.turn import trim_turns
11+
import scorelib.rttm as rttm_func
12+
from scorelib.uem import load_uem
13+
14+
def get_args():
15+
parser = argparse.ArgumentParser(
16+
description="""This script truncates the rttm file
17+
using UEM file""")
18+
parser.add_argument("rttm_file", type=str,
19+
help="""Input RTTM file.
20+
The format of the RTTM file is
21+
<type> <file-id> <channel-id> <begin-time> """
22+
"""<end-time> <NA> <NA> <speaker> <conf>""")
23+
parser.add_argument("uem_file", type=str,
24+
help="""Input UEM file.
25+
The format of the UEM file is
26+
<file-id> <channel-id> <begin-time> <end-time>""")
27+
parser.add_argument("rttm_file_write", type=str,
28+
help="""output RTTM file.""")
29+
args = parser.parse_args()
30+
return args
31+
32+
33+
if __name__ == '__main__':
34+
args = get_args()
35+
rttm_writer = open(args.rttm_file_write, 'w')
36+
turns, speaker_ids, file_ids = rttm_func.load_rttm(args.rttm_file)
37+
loaded_uem = load_uem(args.uem_file)
38+
truncated_turns = trim_turns(turns, loaded_uem)
39+
rttm_func.write_rttm(args.rttm_file_write,truncated_turns)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
S01_U01 1 0 12000
2+
S02_U01 1 75 12000
3+
S09_U01 1 64 12000
4+
S21_U01 1 59 12000
5+
S01_U02 1 0 12000
6+
S02_U02 1 75 12000
7+
S09_U02 1 64 12000
8+
S21_U02 1 59 12000
9+
S01_U03 1 0 12000
10+
S02_U03 1 75 12000
11+
S09_U03 1 64 12000
12+
S21_U03 1 59 12000
13+
S01_U04 1 0 12000
14+
S02_U04 1 75 12000
15+
S09_U04 1 64 12000
16+
S21_U04 1 59 12000
17+
S01_U06 1 0 12000
18+
S02_U06 1 75 12000
19+
S09_U06 1 64 12000
20+
S21_U06 1 59 12000

egs/chime6/s5_track2/path.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
export KALDI_ROOT=`pwd`/../../..
22
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
33
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
4+
export PATH=$PWD/dscore:$PATH
5+
export PYTHONPATH="${PYTHONPATH}:$PWD/dscore"
46
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
57
. $KALDI_ROOT/tools/config/common_path.sh
68
export LC_ALL=C

egs/chime6/s5_track2/run.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ stage=0
1616
nnet_stage=-10
1717
sad_stage=0
1818
diarizer_stage=0
19-
decode_stage=1
19+
decode_stage=0
2020
enhancement=beamformit # for a new enhancement method,
2121
# change this variable and decode stage
2222
decode_only=false
@@ -111,8 +111,12 @@ if [ $stage -le 4 ]; then
111111
utils/copy_data_dir.sh data/train_worn data/train_worn_org # back up
112112
grep -v -e "^P11_S03" -e "^P52_S19" -e "^P53_S24" -e "^P54_S24" data/train_worn_org/text > data/train_worn/text
113113
utils/fix_data_dir.sh data/train_worn
114-
fi
115114

115+
# Remove S12_U05 from training data since it has known issues
116+
utils/copy_data_dir.sh data/train_u05 data/train_u05_org # back up
117+
grep -v -e "^S12_U05" data/train_u05_org/text > data/train_u05/text
118+
utils/fix_data_dir.sh data/train_u05
119+
fi
116120

117121
#########################################################################################
118122
# In stages 5 and 6, we augment and fix train data for our training purpose. point source

egs/wsj/s5/local/nnet3/run_ivector_common.sh

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,12 @@ fi
7777

7878

7979
# high-resolution features and i-vector extractor,
80-
if [ $stage -le 5 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
80+
if [ $stage -le 4 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then
8181
echo "$0: data/${train_set}_sp_hires/feats.scp already exists."
8282
echo " ... Please either remove it, or rerun this script with stage > 2."
8383
exit 1
8484
fi
8585

86-
if [ $stage -le 4 ]; then
87-
echo "$0: preparing directory for speed-perturbed data"
88-
utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
89-
fi
90-
9186
if [ $stage -le 5 ]; then
9287
echo "$0: creating high-resolution MFCC features"
9388

0 commit comments

Comments
 (0)