Skip to content

Commit fb514dc

Browse files
jtrmaldanpovey
authored andcommitted
[egs] BABEL script fix: avoid make_L_align.sh generating invalid files (kaldi-asr#3022)
1 parent 41ea8cf commit fb514dc

File tree

3 files changed

+13
-6
lines changed

3 files changed

+13
-6
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ GSYMS
8383
/tools/ATLAS/
8484
/tools/atlas3.8.3.tar.gz
8585
/tools/irstlm/
86+
/tools/mitlm/
8687
/tools/openfst
8788
/tools/openfst-1.3.2.tar.gz
8889
/tools/openfst-1.3.2/

egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ unsup_data_list=./conf/lists/404-georgian/untranscribed-training.list
7575
unsup_nj=32
7676

7777

78-
lexicon_file=
79-
lexiconFlags="--romanized --oov <unk>"
78+
lexicon_file=/export/corpora/LDC/LDC2016S12/IARPA_BABEL_OP3_404/conversational/reference_materials/lexicon.txt
79+
lexiconFlags=" --romanized --oov <unk>"
8080

8181

8282

egs/babel/s5d/local/make_L_align.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,24 @@ tmpdir=$1
3434
dir=$2
3535
outdir=$3
3636

37+
for f in $dir/phones/optional_silence.txt $dir/phones.txt $dir/words.txt ; do
38+
[ ! -f $f ] && echo "$0: The file $f must exist!" exit 1
39+
fi
40+
3741
silphone=`cat $dir/phones/optional_silence.txt` || exit 1;
3842

43+
if [ ! -f $tmpdir/lexicon.txt ] && [ ! -f $tmpdir/lexiconp.txt ] ; then
44+
echo "$0: At least one of the files $tmpdir/lexicon.txt or $tmpdir/lexiconp.txt must exist" >&2
45+
exit 1
46+
fi
47+
3948
# Create lexicon with alignment info
4049
if [ -f $tmpdir/lexicon.txt ] ; then
4150
cat $tmpdir/lexicon.txt | \
4251
awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }'
43-
elif [ -f $tmpdir/lexiconp.txt ] ; then
52+
else
4453
cat $tmpdir/lexiconp.txt | \
4554
awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }'
46-
else
47-
echo "Neither $tmpdir/lexicon.txt nor $tmpdir/lexiconp.txt does not exist"
48-
exit 1
4955
fi | utils/make_lexicon_fst.pl - 0.5 $silphone | \
5056
fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \
5157
--keep_isymbols=false --keep_osymbols=false | \

0 commit comments

Comments
 (0)