From f7ca5128126efda6ff1e4f1c0078e1879f909a85 Mon Sep 17 00:00:00 2001 From: Larissa Date: Thu, 30 Oct 2025 11:11:42 +0100 Subject: [PATCH 1/2] Add word-exit-penalty to TreeTimesyncBeamSearch --- .../TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc | 10 ++++++++++ .../TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh | 2 ++ 2 files changed, 12 insertions(+) diff --git a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc index e65299b1..8de0b06e 100644 --- a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc +++ b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc @@ -117,6 +117,11 @@ const Core::ParameterBool TreeTimesyncBeamSearch::paramCollapseRepeatedLabels( "Collapse repeated emission of the same label into one output. If false, every emission is treated like a new output.", false); +const Core::ParameterFloat TreeTimesyncBeamSearch::paramWordExitPenalty( + "word-exit-penalty", + "Constant score which is added at a word end.", + 0, 0); + const Core::ParameterBool TreeTimesyncBeamSearch::paramSentenceEndFallBack( "sentence-end-fall-back", "Allow for fallback solution if no active word-end hypothesis exists at the end of a segment.", @@ -139,6 +144,7 @@ TreeTimesyncBeamSearch::TreeTimesyncBeamSearch(Core::Configuration const& config maxWordEndBeamSize_(paramMaxWordEndBeamSize(config)), scoreThreshold_(paramScoreThreshold(config)), wordEndScoreThreshold_(paramWordEndScoreThreshold(config)), + wordExitPenalty_(paramWordExitPenalty(config)), cacheCleanupInterval_(paramCacheCleanupInterval(config)), useBlank_(), collapseRepeatedLabels_(paramCollapseRepeatedLabels(config)), @@ -463,6 +469,10 @@ bool TreeTimesyncBeamSearch::decodeStep() { Lm::Score lmScore = languageModel_->score(wordEndExtension.lmHistory, st); wordEndExtension.score += lmScore; wordEndExtension.lmScore = lmScore; + + // Add word exit penalty + wordEndExtension.score += wordExitPenalty_; + wordEndExtension.lmScore += wordExitPenalty_; } extensions_.push_back(wordEndExtension); } diff --git a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh index 9305e089..b5871f07 100644 --- a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh +++ b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh @@ -48,6 +48,7 @@ public: static const Core::ParameterFloat paramScoreThreshold; static const Core::ParameterFloat paramWordEndScoreThreshold; static const Core::ParameterBool paramCollapseRepeatedLabels; + static const Core::ParameterFloat paramWordExitPenalty; static const Core::ParameterBool paramSentenceEndFallBack; static const Core::ParameterBool paramLogStepwiseStatistics; static const Core::ParameterBool paramCacheCleanupInterval; @@ -122,6 +123,7 @@ private: size_t maxWordEndBeamSize_; Score scoreThreshold_; Score wordEndScoreThreshold_; + Score wordExitPenalty_; Nn::LabelIndex blankLabelIndex_; size_t cacheCleanupInterval_; From 30eb2ae030b677338933fd93ad4b0cb016ba98ee Mon Sep 17 00:00:00 2001 From: Larissa Date: Mon, 3 Nov 2025 09:36:35 +0100 Subject: [PATCH 2/2] Make penalty part of the AM score and introduce separate penalty for silence --- .../TreeTimesyncBeamSearch.cc | 26 ++++++++++++++++--- .../TreeTimesyncBeamSearch.hh | 2 ++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc index 8de0b06e..dc9345f3 100644 --- a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc +++ b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.cc @@ -120,7 +120,12 @@ const Core::ParameterBool TreeTimesyncBeamSearch::paramCollapseRepeatedLabels( const Core::ParameterFloat TreeTimesyncBeamSearch::paramWordExitPenalty( "word-exit-penalty", "Constant score which is added at a word end.", - 0, 0); + 0); + +const Core::ParameterFloat TreeTimesyncBeamSearch::paramSilencePenalty( + "silence-penalty", + "Constant score which is added when predicting silence. If not set, it will be the same as word-exit-penalty.", + Core::Type::min); const Core::ParameterBool TreeTimesyncBeamSearch::paramSentenceEndFallBack( "sentence-end-fall-back", @@ -145,6 +150,7 @@ TreeTimesyncBeamSearch::TreeTimesyncBeamSearch(Core::Configuration const& config scoreThreshold_(paramScoreThreshold(config)), wordEndScoreThreshold_(paramWordEndScoreThreshold(config)), wordExitPenalty_(paramWordExitPenalty(config)), + silencePenalty_(paramSilencePenalty(config)), cacheCleanupInterval_(paramCacheCleanupInterval(config)), useBlank_(), collapseRepeatedLabels_(paramCollapseRepeatedLabels(config)), @@ -177,6 +183,10 @@ TreeTimesyncBeamSearch::TreeTimesyncBeamSearch(Core::Configuration const& config error() << "Word-end score-threshold which is relative to the score-threshold is set, but score-threshold is not set"; } wordEndScoreThreshold_ *= scoreThreshold_; + + if (silencePenalty_ == Core::Type::min) { + silencePenalty_ = wordExitPenalty_; + } } Speech::ModelCombination::Mode TreeTimesyncBeamSearch::requiredModelCombination() const { @@ -470,10 +480,18 @@ bool TreeTimesyncBeamSearch::decodeStep() { wordEndExtension.score += lmScore; wordEndExtension.lmScore = lmScore; - // Add word exit penalty - wordEndExtension.score += wordExitPenalty_; - wordEndExtension.lmScore += wordExitPenalty_; + // Add exit penalty for silence or for non-silence word + if (lemma == lexicon_->specialLemma("silence")) { + wordEndExtension.score += silencePenalty_; + } + else { + wordEndExtension.score += wordExitPenalty_; + } } + else if (lemma == lexicon_->specialLemma("silence")) { + wordEndExtension.score += silencePenalty_; + } + extensions_.push_back(wordEndExtension); } } diff --git a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh index b5871f07..bbb347b6 100644 --- a/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh +++ b/src/Search/TreeTimesyncBeamSearch/TreeTimesyncBeamSearch.hh @@ -49,6 +49,7 @@ public: static const Core::ParameterFloat paramWordEndScoreThreshold; static const Core::ParameterBool paramCollapseRepeatedLabels; static const Core::ParameterFloat paramWordExitPenalty; + static const Core::ParameterFloat paramSilencePenalty; static const Core::ParameterBool paramSentenceEndFallBack; static const Core::ParameterBool paramLogStepwiseStatistics; static const Core::ParameterBool paramCacheCleanupInterval; @@ -124,6 +125,7 @@ private: Score scoreThreshold_; Score wordEndScoreThreshold_; Score wordExitPenalty_; + Score silencePenalty_; Nn::LabelIndex blankLabelIndex_; size_t cacheCleanupInterval_;