Skip to content

Commit 1dcdf80

Browse files
authored
[src] Fixes to grammar-fst code to handle LM-disambig symbols properly (kaldi-asr#3000)
thanks: [email protected]
1 parent 05d9a3d commit 1dcdf80

File tree

3 files changed

+132
-11
lines changed

3 files changed

+132
-11
lines changed

src/decoder/grammar-fst.cc

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,98 @@ void GrammarFst::Read(std::istream &is, bool binary) {
443443
}
444444

445445

446+
/**
447+
This utility function input-determinizes a specified state s of the FST
448+
'fst'. (This input-determinizes while treating epsilon as a real symbol,
449+
although for the application we expect to use it, there won't be epsilons).
450+
451+
What this function does is: for any symbol i that appears as the ilabel of
452+
more than one arc leaving state s of FST 'fst', it creates an additional
453+
state, it creates a new state t with epsilon-input transitions leaving it for
454+
each of those multiple arcs leaving state s; it deletes the original arcs
455+
leaving state s; and it creates a single arc leaving state s to the newly
456+
created state with the ilabel i on it. It sets the weights as necessary to
457+
preserve equivalence and also to ensure that if, prior to this modification,
458+
the FST was stochastic when cast to the log semiring (see
459+
IsStochasticInLog()), it still will be. I.e. when interpreted as
460+
negative logprobs, the weight from state s to t would be the sum of
461+
the weights on the original arcs leaving state s.
462+
463+
This is used as a very cheap solution when preparing FSTs for the grammar
464+
decoder, to ensure that there is only one entry-state to the sub-FST for each
465+
phonetic left-context; this keeps the grammar-FST code (i.e. the code that
466+
stitches them together) simple. Of course it will tend to introduce
467+
unnecessary epsilons, and if we were careful we might be able to remove
468+
some of those, but this wouldn't have a substantial impact on overall
469+
decoder performance so we don't bother.
470+
*/
471+
static void InputDeterminizeSingleState(StdArc::StateId s,
472+
VectorFst<StdArc> *fst) {
473+
bool was_input_deterministic = true;
474+
typedef StdArc Arc;
475+
typedef Arc::StateId StateId;
476+
typedef Arc::Label Label;
477+
typedef Arc::Weight Weight;
478+
479+
struct InfoForIlabel {
480+
std::vector<size_t> arc_indexes; // indexes of all arcs with this ilabel
481+
float tot_cost; // total cost of all arcs leaving state s for this
482+
// ilabel, summed as if they were negative log-probs.
483+
StateId new_state; // state-id of new state, if any, that we have created
484+
// to remove duplicate symbols with this ilabel.
485+
InfoForIlabel(): new_state(-1) { }
486+
};
487+
488+
std::unordered_map<Label, InfoForIlabel> label_map;
489+
490+
size_t arc_index = 0;
491+
for (ArcIterator<VectorFst<Arc> > aiter(*fst, s);
492+
!aiter.Done(); aiter.Next(), ++arc_index) {
493+
const Arc &arc = aiter.Value();
494+
InfoForIlabel &info = label_map[arc.ilabel];
495+
if (info.arc_indexes.empty()) {
496+
info.tot_cost = arc.weight.Value();
497+
} else {
498+
info.tot_cost = -kaldi::LogAdd(-info.tot_cost, -arc.weight.Value());
499+
was_input_deterministic = false;
500+
}
501+
info.arc_indexes.push_back(arc_index);
502+
}
503+
504+
if (was_input_deterministic)
505+
return; // Nothing to do.
506+
507+
// 'new_arcs' will contain the modified list of arcs
508+
// leaving state s
509+
std::vector<Arc> new_arcs;
510+
new_arcs.reserve(arc_index);
511+
arc_index = 0;
512+
for (ArcIterator<VectorFst<Arc> > aiter(*fst, s);
513+
!aiter.Done(); aiter.Next(), ++arc_index) {
514+
const Arc &arc = aiter.Value();
515+
Label ilabel = arc.ilabel;
516+
InfoForIlabel &info = label_map[ilabel];
517+
if (info.arc_indexes.size() == 1) {
518+
new_arcs.push_back(arc); // no changes needed
519+
} else {
520+
if (info.new_state < 0) {
521+
info.new_state = fst->AddState();
522+
// add arc from state 's' to newly created state.
523+
new_arcs.push_back(Arc(ilabel, 0, Weight(info.tot_cost),
524+
info.new_state));
525+
}
526+
// add arc from new state to original destination of this arc.
527+
fst->AddArc(info.new_state, Arc(0, arc.olabel,
528+
Weight(arc.weight.Value() - info.tot_cost),
529+
arc.nextstate));
530+
}
531+
}
532+
fst->DeleteArcs(s);
533+
for (size_t i = 0; i < new_arcs.size(); i++)
534+
fst->AddArc(s, new_arcs[i]);
535+
}
536+
537+
446538
// This class contains the implementation of the function
447539
// PrepareForGrammarFst(), which is declared in grammar-fst.h.
448540
class GrammarFstPreparer {
@@ -475,6 +567,12 @@ class GrammarFstPreparer {
475567
// OK, state s is a special state.
476568
FixArcsToFinalStates(s);
477569
MaybeAddFinalProbToState(s);
570+
// The following ensures that the start-state of sub-FSTs only has
571+
// a single arc per left-context phone (the graph-building recipe can
572+
// end up creating more than one if there were disambiguation symbols,
573+
// e.g. for langauge model backoff).
574+
if (s == fst_->Start() && IsEntryState(s))
575+
InputDeterminizeSingleState(s, fst_);
478576
}
479577
}
480578
}
@@ -487,7 +585,7 @@ class GrammarFstPreparer {
487585

488586
// Returns true if state 's' has at least one arc coming out of it with a
489587
// special nonterminal-related ilabel on it (i.e. an ilabel >=
490-
// kNontermBigNumber)
588+
// kNontermBigNumber), and false otherwise.
491589
bool IsSpecialState(StateId s) const;
492590

493591
// This function verifies that state s does not currently have any
@@ -509,6 +607,10 @@ class GrammarFstPreparer {
509607
// modify this state (by adding input-epsilon arcs), and false otherwise.
510608
bool NeedEpsilons(StateId s) const;
511609

610+
// Returns true if state s (which is expected to be the start state, although we
611+
// don't check this) has arcs with nonterminal symbols #nonterm_begin.
612+
bool IsEntryState(StateId s) const;
613+
512614
// Fixes any final-prob-related problems with this state. The problem we aim
513615
// to fix is that there may be arcs with nonterminal symbol #nonterm_end which
514616
// transition from this state to a state with non-unit final prob. This
@@ -599,6 +701,24 @@ bool GrammarFstPreparer::IsSpecialState(StateId s) const {
599701
return false;
600702
}
601703

704+
bool GrammarFstPreparer::IsEntryState(StateId s) const {
705+
int32 big_number = kNontermBigNumber,
706+
encoding_multiple = GetEncodingMultiple(nonterm_phones_offset_);
707+
708+
for (ArcIterator<FST> aiter(*fst_, s ); !aiter.Done(); aiter.Next()) {
709+
const Arc &arc = aiter.Value();
710+
int32 nonterminal = (arc.ilabel - big_number) /
711+
encoding_multiple;
712+
// we check that at least one has label with nonterminal equal to #nonterm_begin...
713+
// in fact they will all have this value if at least one does, and this was checked
714+
// in NeedEpsilons().
715+
if (nonterminal == kNontermBegin)
716+
return true;
717+
}
718+
return false;
719+
}
720+
721+
602722
bool GrammarFstPreparer::NeedEpsilons(StateId s) const {
603723

604724
// See the documentation for GetCategoryOfArc() for explanation of what these are.
@@ -647,7 +767,7 @@ bool GrammarFstPreparer::NeedEpsilons(StateId s) const {
647767
if (nonterminal == GetPhoneSymbolFor(kNontermBegin) &&
648768
s != fst_->Start()) {
649769
KALDI_ERR << "#nonterm_begin symbol is present but this is not the "
650-
"first arc. Did you do fstdeterminizestar while compiling?";
770+
"first state. Did you do fstdeterminizestar while compiling?";
651771
}
652772
if (nonterminal == GetPhoneSymbolFor(kNontermEnd)) {
653773
if (fst_->NumArcs(arc.nextstate) != 0 ||

src/decoder/grammar-fst.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,14 +229,15 @@ class GrammarFst {
229229
an arc-index leaving a particular state in an FST (i.e. an index
230230
that we could use to Seek() to the matching arc).
231231
232-
@param [in] fst The FST we are looking for state-indexes for
233-
@param [in] entry_state The state in the FST-- must have arcs with
234-
ilabels decodable as (nonterminal_symbol, left_context_phone).
235-
Will either be the start state (if 'nonterminal_symbol'
236-
corresponds to #nonterm_begin), or an internal state
237-
(if 'nonterminal_symbol' corresponds to #nonterm_reenter).
238-
The arc-indexes of those arcs will be the values
239-
we set in 'phone_to_arc'
232+
@param [in] fst The FST that is being entered (or reentered)
233+
@param [in] entry_state The state in 'fst' which is being entered
234+
(or reentered); will be fst.Start() if it's being
235+
entered. It must have arcs with ilabels decodable as
236+
(nonterminal_symbol, left_context_phone). Will either be the
237+
start state (if 'nonterminal_symbol' corresponds to
238+
#nonterm_begin), or an internal state (if 'nonterminal_symbol'
239+
corresponds to #nonterm_reenter). The arc-indexes of those
240+
arcs will be the values we set in 'phone_to_arc'
240241
@param [in] nonterminal_symbol The index in phones.txt of the
241242
nonterminal symbol we expect to be encoded in the ilabels
242243
of the arcs leaving 'entry_state'. Will either correspond

src/doc/grammar.dox

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ Z_S 243
352352
The special symbols in CLG.fst will be as follows.
353353

354354
The following special symbols may appear in any CLG graph, top-level or not:
355-
- When any graph invokes a sub-graph, there will be n arc with an ilabel
355+
- When any graph invokes a sub-graph, there will be an arc with an ilabel
356356
(</code>\#nonterm:foo</code>, <em>left-context-phone</em>) representing the
357357
user-specified nonterminal and the actual left-context, which will be
358358
followed by arcs with ilabels of the form (</code>\#nonterm_reenter</code>,

0 commit comments

Comments
 (0)