Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Local-first speech-to-text CLI.
- single-instance command coordination via unix socket
- audio capture via PipeWire/Pulse
- streaming ASR via NVIDIA Riva gRPC
- transcript normalization + optional trailing space
- transcript normalization + sentence capitalization + optional trailing space
- output adapters:
- clipboard command (`clipboard_cmd`)
- optional paste command override (`paste_cmd`)
Expand Down
5 changes: 4 additions & 1 deletion apps/sotto/internal/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ func Default() Config {
LanguageCode: "en-US",
Model: "",
},
Transcript: TranscriptConfig{TrailingSpace: true},
Transcript: TranscriptConfig{
TrailingSpace: true,
CapitalizeSentences: true,
},
Indicator: IndicatorConfig{
Enable: true,
Backend: "hypr",
Expand Down
12 changes: 9 additions & 3 deletions apps/sotto/internal/config/parser_jsonc.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ type jsoncASR struct {
}

type jsoncTranscript struct {
TrailingSpace *bool `json:"trailing_space"`
TrailingSpace *bool `json:"trailing_space"`
CapitalizeSentences *bool `json:"capitalize_sentences"`
}

type jsoncIndicator struct {
Expand Down Expand Up @@ -176,8 +177,13 @@ func (payload jsoncConfig) applyTo(cfg *Config) ([]Warning, error) {
}
}

if payload.Transcript != nil && payload.Transcript.TrailingSpace != nil {
cfg.Transcript.TrailingSpace = *payload.Transcript.TrailingSpace
if payload.Transcript != nil {
if payload.Transcript.TrailingSpace != nil {
cfg.Transcript.TrailingSpace = *payload.Transcript.TrailingSpace
}
if payload.Transcript.CapitalizeSentences != nil {
cfg.Transcript.CapitalizeSentences = *payload.Transcript.CapitalizeSentences
}
}

if payload.Indicator != nil {
Expand Down
6 changes: 6 additions & 0 deletions apps/sotto/internal/config/parser_legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,12 @@ func applyRootKey(cfg *Config, key, value string) error {
return fmt.Errorf("invalid bool for transcript.trailing_space: %w", err)
}
cfg.Transcript.TrailingSpace = b
case "transcript.capitalize_sentences":
b, err := strconv.ParseBool(value)
if err != nil {
return fmt.Errorf("invalid bool for transcript.capitalize_sentences: %w", err)
}
cfg.Transcript.CapitalizeSentences = b
case "indicator.enable":
b, err := strconv.ParseBool(value)
if err != nil {
Expand Down
12 changes: 12 additions & 0 deletions apps/sotto/internal/config/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,18 @@ func TestParsePasteShortcut(t *testing.T) {
}
}

func TestParseTranscriptCapitalizeSentencesJSONC(t *testing.T) {
cfg, _, err := Parse(`{"transcript":{"capitalize_sentences":false}}`, Default())
require.NoError(t, err)
require.False(t, cfg.Transcript.CapitalizeSentences)
}

func TestParseTranscriptCapitalizeSentencesLegacy(t *testing.T) {
cfg, _, err := Parse("transcript.capitalize_sentences = false\n", Default())
require.NoError(t, err)
require.False(t, cfg.Transcript.CapitalizeSentences)
}

func TestParseIndicatorBackend(t *testing.T) {
cfg, _, err := Parse(`
{
Expand Down
3 changes: 2 additions & 1 deletion apps/sotto/internal/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ type ASRConfig struct {

// TranscriptConfig controls transcript assembly formatting.
type TranscriptConfig struct {
TrailingSpace bool
TrailingSpace bool
CapitalizeSentences bool
}

// IndicatorConfig controls visual indicator and audio cue behavior.
Expand Down
5 changes: 4 additions & 1 deletion apps/sotto/internal/pipeline/transcriber.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ func (t *Transcriber) StopAndTranscribe(ctx context.Context) (session.StopResult
return result, fmt.Errorf("collect final transcript: %w", err)
}

transcribed := transcript.Assemble(segments, t.cfg.Transcript.TrailingSpace)
transcribed := transcript.Assemble(segments, transcript.Options{
TrailingSpace: t.cfg.Transcript.TrailingSpace,
CapitalizeSentences: t.cfg.Transcript.CapitalizeSentences,
})
rawPCM := capture.RawPCM()
t.writeDebugAudio(rawPCM)
t.closeDebugArtifacts()
Expand Down
2 changes: 1 addition & 1 deletion apps/sotto/internal/pipeline/transcriber_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ func TestStopAndTranscribeSuccessPath(t *testing.T) {

result, err := transcriber.StopAndTranscribe(context.Background())
require.NoError(t, err)
require.Equal(t, "hello world ", result.Transcript)
require.Equal(t, "Hello world ", result.Transcript)
require.Equal(t, "Mic (mic-1)", result.AudioDevice)
require.Equal(t, int64(4096), result.BytesCaptured)
require.Equal(t, 12*time.Millisecond, result.GRPCLatency)
Expand Down
93 changes: 89 additions & 4 deletions apps/sotto/internal/transcript/assemble.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
// Package transcript assembles and normalizes recognized ASR segments.
package transcript

import "strings"
import (
"regexp"
"strings"
"unicode"
)

// Assemble joins final ASR segments and applies whitespace/trailing-space normalization.
func Assemble(finalSegments []string, trailingSpace bool) string {
// Options controls transcript assembly formatting behavior.
type Options struct {
TrailingSpace bool
CapitalizeSentences bool
}

var (
pronounIContractionPattern = regexp.MustCompile(`\bi['’](?:m|d|ll|ve|re|s)\b`)
pronounIWordPattern = regexp.MustCompile(`\bi\b`)
)

// Assemble joins final ASR segments and applies configured normalization.
func Assemble(finalSegments []string, opts Options) string {
if len(finalSegments) == 0 {
return ""
}
Expand All @@ -15,8 +30,78 @@ func Assemble(finalSegments []string, trailingSpace bool) string {
return ""
}

if trailingSpace {
if opts.CapitalizeSentences {
normalized = capitalizeSentences(normalized)
}

if opts.TrailingSpace {
return normalized + " "
}
return normalized
}

func capitalizeSentences(text string) string {
text = capitalizeSentenceStarts(text)
text = pronounIContractionPattern.ReplaceAllStringFunc(text, func(match string) string {
return "I" + match[1:]
})
return pronounIWordPattern.ReplaceAllString(text, "I")
}

func capitalizeSentenceStarts(text string) string {
var out strings.Builder
out.Grow(len(text))

capitalizeStart := true
pendingBoundary := false
sawWhitespaceAfterBoundary := false

for _, r := range text {
if capitalizeStart && unicode.IsLetter(r) {
r = unicode.ToUpper(r)
capitalizeStart = false
pendingBoundary = false
sawWhitespaceAfterBoundary = false
} else if pendingBoundary {
switch {
case unicode.IsSpace(r):
sawWhitespaceAfterBoundary = true
case unicode.IsLetter(r):
if sawWhitespaceAfterBoundary {
r = unicode.ToUpper(r)
}
pendingBoundary = false
sawWhitespaceAfterBoundary = false
case unicode.IsDigit(r):
pendingBoundary = false
sawWhitespaceAfterBoundary = false
case isSentencePrefixRune(r):
// Keep waiting for a letter. This supports punctuation like: . "quote"
default:
if !sawWhitespaceAfterBoundary {
pendingBoundary = false
sawWhitespaceAfterBoundary = false
}
}
}

out.WriteRune(r)

switch r {
case '.', '!', '?':
pendingBoundary = true
sawWhitespaceAfterBoundary = false
}
}

return out.String()
}

func isSentencePrefixRune(r rune) bool {
switch r {
case ')', ']', '}', '\'', '"', '’', '”':
return true
default:
return false
}
}
73 changes: 64 additions & 9 deletions apps/sotto/internal/transcript/assemble_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,92 @@ import (
"github.com/stretchr/testify/require"
)

func TestAssembleNormalizesWhitespaceAndTrailingSpace(t *testing.T) {
func TestAssembleNormalizesWhitespaceTrailingSpaceAndSentenceCase(t *testing.T) {
t.Parallel()

got := Assemble([]string{" hello", "world ", "\nfrom", "sotto"}, true)
require.Equal(t, "hello world from sotto ", got)
got := Assemble([]string{" hello", "world.", "\nfrom", "sotto"}, Options{
TrailingSpace: true,
CapitalizeSentences: true,
})
require.Equal(t, "Hello world. From sotto ", got)
}

func TestAssembleWithoutTrailingSpace(t *testing.T) {
t.Parallel()

got := Assemble([]string{"hello", "world"}, false)
got := Assemble([]string{"hello", "world"}, Options{
TrailingSpace: false,
CapitalizeSentences: false,
})
require.Equal(t, "hello world", got)
}

func TestAssembleEmptyInput(t *testing.T) {
t.Parallel()

require.Empty(t, Assemble(nil, true))
require.Empty(t, Assemble(nil, Options{TrailingSpace: true, CapitalizeSentences: true}))
}

func TestAssembleSkipsWhitespaceOnlySegments(t *testing.T) {
t.Parallel()

got := Assemble([]string{" ", "\n\t", "hello"}, false)
require.Equal(t, "hello", got)
got := Assemble([]string{" ", "\n\t", "hello"}, Options{
TrailingSpace: false,
CapitalizeSentences: true,
})
require.Equal(t, "Hello", got)
}

func TestAssembleSentenceCaseCapitalizesPronounI(t *testing.T) {
t.Parallel()

got := Assemble([]string{"when i speak i'm clearer. i think i will keep using it."}, Options{
TrailingSpace: false,
CapitalizeSentences: true,
})
require.Equal(t, "When I speak I'm clearer. I think I will keep using it.", got)
}

func TestAssembleSentenceCaseDoesNotCapitalizeDomainOrDecimalFragments(t *testing.T) {
t.Parallel()

got := Assemble([]string{"check example.com and v2.1 first. then reply"}, Options{
TrailingSpace: false,
CapitalizeSentences: true,
})
require.Equal(t, "Check example.com and v2.1 first. Then reply", got)
}

func TestAssembleSentenceCaseHandlesQuoteAfterBoundary(t *testing.T) {
t.Parallel()

got := Assemble([]string{"he said. \"hello there\" and left."}, Options{
TrailingSpace: false,
CapitalizeSentences: true,
})
require.Equal(t, "He said. \"Hello there\" and left.", got)
}

func TestAssembleSentenceCaseLeadingBoundaryDoesNotDoubleCapitalize(t *testing.T) {
t.Parallel()

got := Assemble([]string{"2. hello there"}, Options{
TrailingSpace: false,
CapitalizeSentences: true,
})
require.Equal(t, "2. Hello there", got)
}

func TestAssembleIdempotentForNormalizedOutput(t *testing.T) {
t.Parallel()

first := Assemble([]string{"hello", "world"}, false)
second := Assemble([]string{first}, false)
first := Assemble([]string{"hello world. this is sotto"}, Options{
TrailingSpace: false,
CapitalizeSentences: true,
})
second := Assemble([]string{first}, Options{
TrailingSpace: false,
CapitalizeSentences: true,
})
require.Equal(t, first, second)
}
2 changes: 1 addition & 1 deletion docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ flowchart LR

Session --> Audio["Audio capture\n(PipeWire/Pulse)"]
Session --> ASR["Riva streaming client\n(gRPC)"]
ASR --> Transcript["Transcript assembly\n(normalize + trailing space)"]
ASR --> Transcript["Transcript assembly\n(normalize + sentence case + trailing space)"]
Transcript --> Output["Output adapters\n(clipboard + paste)"]

Session --> Indicator["Indicator adapters\n(hypr or desktop) + cues"]
Expand Down
4 changes: 3 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Top-level object keys:
| Key | Default | Notes |
| --- | --- | --- |
| `transcript.trailing_space` | `true` | append space after assembled transcript |
| `transcript.capitalize_sentences` | `true` | sentence-case output and promote standalone `i`/`i'm` to `I`/`I'm` |

### `indicator`

Expand Down Expand Up @@ -153,7 +154,8 @@ default-timeout=0
},

"transcript": {
"trailing_space": true
"trailing_space": true,
"capitalize_sentences": true
},

"indicator": {
Expand Down
Loading