kaldi-asr
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎egs/librispeech/s5/local/lookahead/run_lookahead.sh‎
Lines changed: 87 additions & 0 deletions b/‎egs/librispeech/s5/local/lookahead/run_lookahead.sh‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh‎
Lines changed: 86 additions & 0 deletions b/‎egs/mini_librispeech/s5/local/lookahead/run_lookahead.sh‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎egs/wsj/s5/steps/nnet3/decode_lookahead.sh‎
Lines changed: 148 additions & 0 deletions b/‎egs/wsj/s5/steps/nnet3/decode_lookahead.sh‎
Lines changed: 148 additions & 0 deletions
@@ -151,6 +151,8 @@ GSYMS
 /tools/cub-1.8.0/
 /tools/cub
 /tools/python/
+/tools/ngram-1.3.7.tar.gz
+/tools/ngram-1.3.7/
 
 # These CMakeLists.txt files are all genareted on the fly at the moment.
 # They are added here to avoid accidently checkin.
 
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. ./path.sh
+
+# Example script for lookahead composition
+
+lm=tgmed
+am=exp/chain_cleaned/tdnn_1d_sp
+testset=test_clean
+
+# %WER 4.86 [ 2553 / 52576, 315 ins, 222 del, 2016 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead/wer_11_0.0
+# %WER 4.79 [ 2518 / 52576, 279 ins, 292 del, 1947 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_arpa/wer_11_0.0
+# %WER 4.82 [ 2532 / 52576, 286 ins, 290 del, 1956 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_arpa_fast/wer_11_0.0
+# %WER 4.86 [ 2553 / 52576, 314 ins, 222 del, 2017 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_base/wer_11_0.0
+# %WER 4.86 [ 2553 / 52576, 315 ins, 222 del, 2016 sub ] exp/chain_cleaned/tdnn_1d_sp/decode_test_clean_lookahead_static/wer_11_0.0
+
+
+# Speed
+#
+# base       0.18 xRT
+# static     0.18 xRT
+# lookahead  0.29 xRT
+# arpa       0.35 xRT
+# arpa_fast  0.21 xRT
+
+# Graph size
+#
+# Base                 476 Mb
+# Static               621 Mb
+# Lookahead            48 Mb HCL + 77 Mb Grammar
+# Lookahead + OpenGrm  48 Mb HCL + 42 Mb Grammar
+
+if [ ! -f "${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so" ]; then
+    echo "Missing ${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so"
+    echo "Make sure you compiled openfst with lookahead support. Run make in ${KALDI_ROOT}/tools after git pull."
+    exit 1
+fi
+if [ ! -f "${KALDI_ROOT}/tools/openfst/bin/ngramread" ]; then
+    echo "You appear to not have OpenGRM tools installed. Missing ${KALDI_ROOT}/tools/openfst/bin/ngramread"
+    echo "cd to $KALDI_ROOT/tools and run extras/install_opengrm.sh."
+    exit 1
+fi
+export LD_LIBRARY_PATH=${KALDI_ROOT}/tools/openfst/lib/fst
+
+# Baseline
+utils/format_lm.sh data/lang data/local/lm/lm_${lm}.arpa.gz \
+    data/local/dict/lexicon.txt data/lang_test_${lm}_base
+
+utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead_base
+
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_base data/${testset}_hires ${am}/decode_${testset}_lookahead_base
+
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --remove-oov --compose-graph \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead
+
+# Decode with statically composed lookahead graph
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead_static
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead
+
+# Compile arpa graph
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --compose-graph \
+    data/lang_test_${lm}_base ${am} data/local/lm/lm_tgmed.arpa.gz ${am}/graph_${lm}_lookahead_arpa
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa
+
+# Decode with runtime composition and tuned beams
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --beam 12.0 --max-active 3000 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_cleaned/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa_fast
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+. ./path.sh
+
+# Example script for lookahead composition
+
+lm=tgmed
+am=exp/chain_online_cmn/tdnn1k_sp
+testset=dev_clean_2
+
+# %WER 10.32 [ 2078 / 20138, 201 ins, 275 del, 1602 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_base/wer_10_0.5
+# %WER 10.29 [ 2073 / 20138, 200 ins, 272 del, 1601 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_static/wer_10_0.5
+# %WER 10.25 [ 2064 / 20138, 192 ins, 277 del, 1595 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead/wer_10_0.5
+# %WER 10.24 [ 2063 / 20138, 187 ins, 290 del, 1586 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_arpa/wer_10_0.5
+# %WER 10.29 [ 2072 / 20138, 228 ins, 242 del, 1602 sub ] exp/chain_online_cmn/tdnn1k_sp/decode_dev_clean_2_lookahead_arpa_fast/wer_9_0.5
+
+# Speed
+#
+# base       0.29 xRT
+# static     0.31 xRT
+# lookahead  0.77 xRT
+# arpa       1.03 xRT
+# arpa_fast  0.31 xRT
+
+# Graph size
+#
+# Base                 461 Mb
+# Static               587 Mb
+# Lookahead            44 Mb HCL + 77 Mb Grammar
+# Lookahead + OpenGrm  44 Mb HCL + 42 Mb Grammar
+
+if [ ! -f "${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so" ]; then
+    echo "Missing ${KALDI_ROOT}/tools/openfst/lib/libfstlookahead.so"
+    echo "Make sure you compiled openfst with lookahead support. Run make in ${KALDI_ROOT}/tools after git pull."
+    exit 1
+fi
+if [ ! -f "${KALDI_ROOT}/tools/openfst/bin/ngramread" ]; then
+    echo "You appear to not have OpenGRM tools installed. Missing ${KALDI_ROOT}/tools/openfst/bin/ngramread"
+    echo "cd to $KALDI_ROOT/tools and run extras/install_opengrm.sh."
+    exit 1
+fi
+export LD_LIBRARY_PATH=${KALDI_ROOT}/tools/openfst/lib/fst
+
+# Baseline
+utils/format_lm.sh data/lang data/local/lm/lm_${lm}.arpa.gz \
+    data/local/dict/lexicon.txt data/lang_test_${lm}_base
+
+utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead_base
+
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_base data/${testset}_hires ${am}/decode_${testset}_lookahead_base
+
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --remove-oov --compose-graph \
+    data/lang_test_${lm}_base ${am} ${am}/graph_${lm}_lookahead
+
+# Decode with statically composed lookahead graph
+steps/nnet3/decode.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead_static
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead data/${testset}_hires ${am}/decode_${testset}_lookahead
+
+# Compile arpa graph
+utils/mkgraph_lookahead.sh --self-loop-scale 1.0 --compose-graph \
+    data/lang_test_${lm}_base ${am} data/local/lm/lm_tgmed.arpa.gz ${am}/graph_${lm}_lookahead_arpa
+
+# Decode with runtime composition
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa
+
+# Decode with runtime composition and tuned beams
+steps/nnet3/decode_lookahead.sh --nj 20 \
+    --beam 12.0 --max-active 3000 \
+    --acwt 1.0 --post-decode-acwt 10.0 \
+    --online-ivector-dir exp/nnet3_online_cmn/ivectors_${testset}_hires \
+    ${am}/graph_${lm}_lookahead_arpa data/${testset}_hires ${am}/decode_${testset}_lookahead_arpa_fast
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+# Copyright 2019       Alpha Cephei Inc (Author: Nickolay Shmmyrev).
+# Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey).
+# Apache 2.0.
+
+# This script does decoding with a neural-net with lookahead composition of HCL and G graphs.
+
+# Begin configuration section.
+stage=1
+nj=4 # number of decoding jobs.
+acwt=0.1  # Just a default value, used for adaptation and beam-pruning..
+post_decode_acwt=1.0  # can be used in 'chain' systems to scale acoustics by 10 so the
+                      # regular scoring script works.
+cmd=run.pl
+beam=15.0
+frames_per_chunk=50
+max_active=7000
+min_active=200
+ivector_scale=1.0
+lattice_beam=8.0 # Beam we use in lattice generation.
+iter=final
+use_gpu=false # If true, will use a GPU, with nnet3-latgen-faster-batch.
+              # In that case it is recommended to set num-threads to a large
+              # number, e.g. 20 if you have that many free CPU slots on a GPU
+              # node, and to use a small number of jobs.
+scoring_opts=
+skip_diagnostics=false
+skip_scoring=false
+extra_left_context=0
+extra_right_context=0
+extra_left_context_initial=-1
+extra_right_context_final=-1
+online_ivector_dir=
+minimize=false
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. utils/parse_options.sh || exit 1;
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
+  echo "e.g.:   $0 --nj 8 \\"
+  echo "--online-ivector-dir exp/nnet2_online/ivectors_test_eval92 \\"
+  echo "    exp/tri4b/graph_bg data/test_eval92_hires $dir/decode_bg_eval92"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                   # config containing options"
+  echo "  --nj <nj>                                # number of parallel jobs"
+  echo "  --cmd <cmd>                              # Command to run in parallel with"
+  echo "  --beam <beam>                            # Decoding beam; default 15.0"
+  echo "  --iter <iter>                            # Iteration of model to decode; default is final."
+  echo "  --scoring-opts <string>                  # options to local/score.sh"
+  echo "  --num-threads <n>                        # number of threads to use, default 1."
+  echo "  --use-gpu <true|false>                   # default: false.  If true, we recommend"
+  echo "                                           # to use large --num-threads as the graph"
+  echo "                                           # search becomes the limiting factor."
+  exit 1;
+fi
+
+graphdir=$1
+data=$2
+dir=$3
+srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
+model=$srcdir/$iter.mdl
+
+
+extra_files=
+if [ ! -z "$online_ivector_dir" ]; then
+  steps/nnet2/check_ivectors_compatible.sh $srcdir $online_ivector_dir || exit 1
+  extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
+fi
+
+#utils/lang/check_phones_compatible.sh {$srcdir,$graphdir}/phones.txt || exit 1
+
+for f in $graphdir/HCLr.fst $graphdir/Gr.fst $graphdir/disambig_tid.int $data/feats.scp $model $extra_files; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+sdata=$data/split$nj;
+cmvn_opts=`cat $srcdir/cmvn_opts` || exit 1;
+thread_string=
+
+mkdir -p $dir/log
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+echo $nj > $dir/num_jobs
+
+
+## Set up features.
+echo "$0: feature type is raw"
+
+feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
+
+if [ ! -z "$online_ivector_dir" ]; then
+  ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+  ivector_opts="--online-ivectors=scp:$online_ivector_dir/ivector_online.scp --online-ivector-period=$ivector_period"
+fi
+
+if [ "$post_decode_acwt" == 1.0 ]; then
+  lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz"
+else
+  lat_wspecifier="ark:|lattice-scale --acoustic-scale=$post_decode_acwt ark:- ark:- | gzip -c >$dir/lat.JOB.gz"
+fi
+
+frame_subsampling_opt=
+if [ -f $srcdir/frame_subsampling_factor ]; then
+  # e.g. for 'chain' systems
+  frame_subsampling_opt="--frame-subsampling-factor=$(cat $srcdir/frame_subsampling_factor)"
+fi
+
+if [ $stage -le 1 ]; then
+  $cmd $queue_opt JOB=1:$nj $dir/log/decode.JOB.log \
+    nnet3-latgen-faster-lookahead $ivector_opts $frame_subsampling_opt \
+     --frames-per-chunk=$frames_per_chunk \
+     --extra-left-context=$extra_left_context \
+     --extra-right-context=$extra_right_context \
+     --extra-left-context-initial=$extra_left_context_initial \
+     --extra-right-context-final=$extra_right_context_final \
+     --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \
+     --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \
+     --word-symbol-table=$graphdir/words.txt "$model" \
+     $graphdir/HCLr.fst $graphdir/Gr.fst $graphdir/disambig_tid.int "$feats" "$lat_wspecifier" || exit 1;
+fi
+
+
+if [ $stage -le 2 ]; then
+  if ! $skip_diagnostics ; then
+    [ ! -z $iter ] && iter_opt="--iter $iter"
+    steps/diagnostic/analyze_lats.sh --cmd "$cmd" $iter_opt $graphdir $dir
+  fi
+fi
+
+
+# The output of this script is the files "lat.*.gz"-- we'll rescore this at
+# different acoustic scales to get the final output.
+if [ $stage -le 3 ]; then
+  if ! $skip_scoring ; then
+    [ ! -x local/score.sh ] && \
+      echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
+    echo "score best paths"
+    [ "$iter" != "final" ] && iter_opt="--iter $iter"
+    local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
+    echo "score confidence and timing with sclite"
+  fi
+fi
+echo "Decoding done."
+exit 0;