Skip to content
This repository was archived by the owner on Mar 29, 2026. It is now read-only.

Commit 8307e65

Browse files
committed
fix(riva): keep one-shot interim segments when boundary is reliable
Address PR feedback on divergence handling by preserving single-update interim segments when they appear reliable (high stability or sentence punctuation), while retaining age-based chain commits and continuation matching. Tests: - go test ./apps/sotto/internal/riva - just ci-check - nix build 'path:.#sotto'
1 parent 6476bcd commit 8307e65

File tree

4 files changed

+70
-14
lines changed

4 files changed

+70
-14
lines changed

apps/sotto/internal/riva/client.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,14 @@ type Stream struct {
4141

4242
recvDone chan struct{}
4343

44-
mu sync.Mutex
45-
segments []string // committed transcript segments (final results and sealed interim chains)
46-
lastInterim string
47-
lastInterimAge int
48-
recvErr error
49-
closedSend bool
50-
debugSinkJSON io.Writer
44+
mu sync.Mutex
45+
segments []string // committed transcript segments (final results and sealed interim chains)
46+
lastInterim string
47+
lastInterimAge int
48+
lastInterimStability float32
49+
recvErr error
50+
closedSend bool
51+
debugSinkJSON io.Writer
5152
}
5253

5354
// DialStream establishes a stream, sends config, and starts the receive loop.

apps/sotto/internal/riva/client_test.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,30 @@ func TestRecordResponseReplacesDivergentInterimWithoutPrecommit(t *testing.T) {
8282
require.Equal(t, []string{"second phrase"}, segments)
8383
}
8484

85+
func TestRecordResponseCommitsStableSingleInterimOnDivergence(t *testing.T) {
86+
s := &Stream{}
87+
88+
s.recordResponse(&asrpb.StreamingRecognizeResponse{
89+
Results: []*asrpb.StreamingRecognitionResult{{
90+
IsFinal: false,
91+
Stability: 0.95,
92+
Alternatives: []*asrpb.SpeechRecognitionAlternative{{Transcript: "first phrase"}},
93+
}},
94+
})
95+
96+
s.recordResponse(&asrpb.StreamingRecognizeResponse{
97+
Results: []*asrpb.StreamingRecognitionResult{{
98+
IsFinal: false,
99+
Stability: 0.20,
100+
Alternatives: []*asrpb.SpeechRecognitionAlternative{{Transcript: "second phrase"}},
101+
}},
102+
})
103+
104+
require.Equal(t, []string{"first phrase"}, s.segments)
105+
segments := collectSegments(s.segments, s.lastInterim)
106+
require.Equal(t, []string{"first phrase", "second phrase"}, segments)
107+
}
108+
85109
func TestRecordResponseCommitsInterimChainOnDivergence(t *testing.T) {
86110
s := &Stream{}
87111

@@ -220,9 +244,11 @@ func TestInterimHelpers(t *testing.T) {
220244
})
221245
}
222246

223-
require.False(t, shouldCommitInterimBoundary("", 5))
224-
require.False(t, shouldCommitInterimBoundary("first phrase", 1))
225-
require.True(t, shouldCommitInterimBoundary("first phrase", 2))
247+
require.False(t, shouldCommitInterimBoundary("", 5, 0.9))
248+
require.False(t, shouldCommitInterimBoundary("first phrase", 1, 0.1))
249+
require.True(t, shouldCommitInterimBoundary("first phrase", 2, 0.1))
250+
require.True(t, shouldCommitInterimBoundary("first phrase", 1, 0.9))
251+
require.True(t, shouldCommitInterimBoundary("done.", 1, 0.0))
226252
}
227253

228254
func TestDialStreamEndToEndWithDebugSinkAndSpeechContexts(t *testing.T) {

apps/sotto/internal/riva/stream_receive.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,21 +54,24 @@ func (s *Stream) recordResponse(resp *asrpb.StreamingRecognizeResponse) {
5454
s.segments = appendSegment(s.segments, transcript)
5555
s.lastInterim = ""
5656
s.lastInterimAge = 0
57+
s.lastInterimStability = 0
5758
continue
5859
}
5960

6061
if s.lastInterim != "" {
6162
if isInterimContinuation(s.lastInterim, transcript) {
6263
s.lastInterim = transcript
6364
s.lastInterimAge++
65+
s.lastInterimStability = result.GetStability()
6466
continue
6567
}
66-
if shouldCommitInterimBoundary(s.lastInterim, s.lastInterimAge) {
68+
if shouldCommitInterimBoundary(s.lastInterim, s.lastInterimAge, s.lastInterimStability) {
6769
s.segments = appendSegment(s.segments, s.lastInterim)
6870
}
6971
}
7072

7173
s.lastInterim = transcript
7274
s.lastInterimAge = 1
75+
s.lastInterimStability = result.GetStability()
7376
}
7477
}

apps/sotto/internal/riva/transcript_segments.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ package riva
22

33
import "strings"
44

5-
const minInterimChainUpdates = 2
5+
const (
6+
minInterimChainUpdates = 2
7+
stableInterimBoundaryThreshold = 0.85
8+
)
69

710
// collectSegments appends a valid trailing interim segment when needed.
811
func collectSegments(committedSegments []string, lastInterim string) []string {
@@ -77,8 +80,31 @@ func isInterimContinuation(previous string, current string) bool {
7780

7881
// shouldCommitInterimBoundary returns true when a divergent interim chain looks
7982
// established enough to preserve as a committed segment.
80-
func shouldCommitInterimBoundary(previous string, chainUpdates int) bool {
81-
return cleanSegment(previous) != "" && chainUpdates >= minInterimChainUpdates
83+
func shouldCommitInterimBoundary(previous string, chainUpdates int, stability float32) bool {
84+
previous = cleanSegment(previous)
85+
if previous == "" {
86+
return false
87+
}
88+
if chainUpdates >= minInterimChainUpdates {
89+
return true
90+
}
91+
if stability >= stableInterimBoundaryThreshold {
92+
return true
93+
}
94+
return endsWithSentencePunctuation(previous)
95+
}
96+
97+
func endsWithSentencePunctuation(text string) bool {
98+
text = strings.TrimSpace(text)
99+
if text == "" {
100+
return false
101+
}
102+
switch text[len(text)-1] {
103+
case '.', '!', '?':
104+
return true
105+
default:
106+
return false
107+
}
82108
}
83109

84110
// commonPrefixWords counts shared leading words across two slices.

0 commit comments

Comments
 (0)