Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
59b3c47
feat(go bindings): add state abstraction
ciricc Sep 13, 2025
ebbcf3f
refactor(go bindings): make thread-safe and stateful context
ciricc Sep 13, 2025
97e6ce2
feat(go bindings): add VAD and Diarization parameters
ciricc Sep 13, 2025
2f16f80
refactor(go bindings): add diarization unit tests
ciricc Sep 13, 2025
01f5c6b
refactor(go bindings): remove public method accessing unsafe whisper
ciricc Sep 13, 2025
ba990ab
refactor(go bindings): fix backward compatibility for error andling l…
ciricc Sep 13, 2025
221e93a
refactor(go bindings): fix backward compatibility for the interfaces
ciricc Sep 13, 2025
b751ec1
fix(go bindings): unit tests and exported interfaces
ciricc Sep 13, 2025
9972297
refactor(go bindings): remove public Parameters from context
ciricc Sep 13, 2025
f943c8f
refactor(go bindings): remove public Model() method
ciricc Sep 13, 2025
125ea61
refactor(go bindings): remove new methods from the Model interface
ciricc Sep 13, 2025
8f9ad60
refactor(go bindings): make ModelContext and StatefulContext public
ciricc Sep 14, 2025
2305a61
feat: split context into stateful/stateless; add concurrency gate, ad…
ciricc Sep 20, 2025
bf72a5b
doc: added README for how-to with the new contexts, removed SetGate p…
ciricc Sep 20, 2025
feaba63
chore: remove pkg/mod
ciricc Sep 20, 2025
334f40b
chor: remove pkg/sumdb
ciricc Sep 20, 2025
50caee3
doc: benchmark step by step doc
ciricc Sep 20, 2025
91f5cc1
refactor: added segment formatiing, fixed benchmark printings
ciricc Sep 20, 2025
5711a71
fix: fix sampling type
ciricc Sep 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bindings/go/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
build
models
samples/a13.wav
samples/benchmark_out.wav
18 changes: 18 additions & 0 deletions bindings/go/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,19 @@ endif

examples: $(EXAMPLES_DIR)

benchmark: model-small whisper modtidy
ifeq ($(UNAME_S),Darwin)
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -bench=BenchmarkContextProcess -benchmem -run '^$$' ./pkg/whisper/...
else
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -benchmem -run '^$$' ./pkg/whisper/...
endif

model-small: mkdir examples/go-model-download
@${BUILD_DIR}/go-model-download -out models ggml-small.en.bin

model-small-tdrz: mkdir examples/go-model-download
@${BUILD_DIR}/go-model-download -out models ggml-small.en-tdrz.bin

$(EXAMPLES_DIR): mkdir whisper modtidy
@echo Build example $(notdir $@)
ifeq ($(UNAME_S),Darwin)
Expand All @@ -57,6 +67,14 @@ else
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
endif

.PHONY: samples
samples:
@echo "Downloading samples..."
@mkdir -p samples
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
@ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
@rm samples/a13.mp3

mkdir:
@echo Mkdir ${BUILD_DIR}
@install -d ${BUILD_DIR}
Expand Down
125 changes: 123 additions & 2 deletions bindings/go/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ This package provides Go bindings for whisper.cpp. They have been tested on:
* Fedora Linux on x86_64

The "low level" bindings are in the `bindings/go` directory and there is a more
Go-style package in the `bindings/go/pkg/whisper` directory. The most simple usage
is as follows:
Go-style package in the `bindings/go/pkg/whisper` directory.

Legacy stateless example (single worker). For the recommended stateful API and
concurrency-safe usage, see "New high-level API" below. Note: `Model.NewContext()`
returns a stateless context for backward compatibility and is not safe for parallel
`Process` calls (may return `ErrStatelessBusy`).

```go
import (
Expand Down Expand Up @@ -100,6 +104,123 @@ Getting help:

* Follow the discussion for the go bindings [here](https://github.com/ggml-org/whisper.cpp/discussions/312)

## New high-level API (stateful and stateless contexts)

The `pkg/whisper` package now exposes two context kinds:

- StatefulContext: recommended for concurrency. Each context owns its own whisper_state.
- StatelessContext: shares the model context. Simpler, but not suitable for parallel `Process` calls.

### Quick start: stateful context (recommended)

```go
package main

import (
"fmt"
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
)

func main() {
// Load model
model, err := whisper.NewModelContext("./models/ggml-small.en.bin")
if err != nil {
panic(err)
}
defer model.Close()

// Configure parameters (optional: provide a config func)
params, err := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, func(p *whisper.Parameters) {
p.SetThreads(4)
p.SetLanguage("en") // or "auto"
p.SetTranslate(false)
})
if err != nil {
panic(err)
}

// Create stateful context (safe for running in parallel goroutines)
ctx, err := whisper.NewStatefulContext(model, params)
if err != nil {
panic(err)
}
defer ctx.Close()

// Your 16-bit mono PCM at 16kHz as float32 samples
var samples []float32

// Process. Callbacks are optional.
if err := ctx.Process(samples, nil, nil, nil); err != nil {
panic(err)
}

// Read segments
for {
seg, err := ctx.NextSegment()
if err != nil {
break
}
fmt.Printf("[%v -> %v] %s\n", seg.Start, seg.End, seg.Text)
}
}
```

### Quick start: stateless context (single worker)

```go
// Load model as above
model, _ := whisper.NewModelContext("./models/ggml-small.en.bin")
defer model.Close()

params, _ := whisper.NewParameters(model, whisper.SAMPLING_GREEDY, nil)
ctx, _ := whisper.NewStatelessContext(model, params)
defer ctx.Close()

if err := ctx.Process(samples, nil, nil, nil); err != nil { panic(err) }
for {
seg, err := ctx.NextSegment()
if err != nil { break }
fmt.Println(seg.Text)
}
```

### Deprecations and migration notes

- The `Context` interface setters are deprecated (SetThreads, SetLanguage, etc.). Use `Parameters` via `NewParameters` and pass it when creating a context.
- `Model.NewContext()` remains for backward compatibility and returns a stateless context by default. Prefer `NewStatefulContext` for concurrency.
- Stateless contexts share the model context. A concurrency gate prevents overlapping `Process` calls and will return `ErrStatelessBusy` if another `Process` is in flight.
- For parallel processing, create one `StatefulContext` per goroutine.

## Benchmarks

Benchmarks live in `pkg/whisper` and compare CPU vs GPU, stateful vs stateless, threads, and callback modes.

### Prerequisites

- Model: `models/ggml-small.en.bin` (or your choice).
- Sample: `samples/jfk.wav`.
- Build the C libs once (also downloads a model for examples):

```bash
cd bindings/go
make examples
# optionally: ./build/go-model-download -out models
```

### Run benchmarks

```bash
cd bindings/go/pkg/whisper
make benchmark
```

### What the benchmarks measure

- Variants: device (cpu/gpu) x context kind (stateless/stateful) x threads {1,2,4, NumCPU} x callback mode (NoCallback, WithSegmentCallback).
- Standard Go benchmark outputs: ns/op, B/op, allocs/op. We also set bytes per op to sample bytes.
- Custom metric `ms_process`: wall time per `Process` iteration, reported via `b.ReportMetric`.
- When `printTimings` is enabled, model-level timings are printed for NoCallback runs using `model.PrintTimings()`.

## License

The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
Expand Down
14 changes: 11 additions & 3 deletions bindings/go/examples/go-model-download/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ import (
// CONSTANTS

const (
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
srcExt = ".bin" // Filename extension
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
srcUrlTinydiarize = "https://huggingface.co/akashmjn/tinydiarize-whisper.cpp/resolve/main/"
srcExt = ".bin" // Filename extension
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
)

var (
Expand All @@ -38,6 +39,7 @@ var (
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
"large-v3", "large-v3-q5_0",
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
"small.en-tdrz",
}
)

Expand Down Expand Up @@ -219,6 +221,12 @@ func URLForModel(model string) (string, error) {
model += srcExt
}

srcUrl := srcUrl

if strings.Contains(model, "tdrz") {
srcUrl = srcUrlTinydiarize
}

// Parse the base URL
url, err := url.Parse(srcUrl)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion bindings/go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ module github.com/ggerganov/whisper.cpp/bindings/go
go 1.23

require (
github.com/go-audio/audio v1.0.0
github.com/go-audio/wav v1.1.0
github.com/stretchr/testify v1.9.0
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-audio/audio v1.0.0 // indirect
github.com/go-audio/riff v1.0.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
43 changes: 43 additions & 0 deletions bindings/go/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,49 @@ func (p *Params) SetPrintTimestamps(v bool) {
p.print_timestamps = toBool(v)
}

// Enable extra debug information
func (p *Params) SetDebugMode(v bool) {
p.debug_mode = toBool(v)
}

// Enable tinydiarize speaker turn detection
func (p *Params) SetDiarize(v bool) {
p.tdrz_enable = toBool(v)
}

// Voice Activity Detection (VAD)
func (p *Params) SetVAD(v bool) {
p.vad = toBool(v)
}

func (p *Params) SetVADModelPath(path string) {
p.vad_model_path = C.CString(path)
}

func (p *Params) SetVADThreshold(t float32) {
p.vad_params.threshold = C.float(t)
}

func (p *Params) SetVADMinSpeechMs(ms int) {
p.vad_params.min_speech_duration_ms = C.int(ms)
}

func (p *Params) SetVADMinSilenceMs(ms int) {
p.vad_params.min_silence_duration_ms = C.int(ms)
}

func (p *Params) SetVADMaxSpeechSec(s float32) {
p.vad_params.max_speech_duration_s = C.float(s)
}

func (p *Params) SetVADSpeechPadMs(ms int) {
p.vad_params.speech_pad_ms = C.int(ms)
}

func (p *Params) SetVADSamplesOverlap(sec float32) {
p.vad_params.samples_overlap = C.float(sec)
}

// Set language id
func (p *Params) SetLanguage(lang int) error {
if lang == -1 {
Expand Down
58 changes: 58 additions & 0 deletions bindings/go/pkg/whisper/concurrency_gate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package whisper

import (
"sync"
"sync/atomic"

// Bindings
whisper "github.com/ggerganov/whisper.cpp/bindings/go"
)

// Gate provides a simple acquire/release contract per key.
// The default implementation is a single-entry lock per key (limit=1).
type Gate interface {
// Acquire returns true if the key was acquired; false if already held
Acquire(key any) bool
// Release releases the key if currently held
Release(key any)
}

// singleFlightGate is a minimal lock with limit=1 per key
type singleFlightGate struct {
m sync.Map // key -> *int32 (0 available, 1 held)
}

func (g *singleFlightGate) Acquire(key any) bool {
ptr, _ := g.m.LoadOrStore(key, new(int32))
busy := ptr.(*int32)
return atomic.CompareAndSwapInt32(busy, 0, 1)
}

func (g *singleFlightGate) Release(key any) {
if v, ok := g.m.Load(key); ok {
atomic.StoreInt32(v.(*int32), 0)
}
}

var defaultGate Gate = &singleFlightGate{}

// SetGate allows applications to override the default gate (e.g., for custom policies)
// Passing nil resets to the default singleFlightGate.
func SetGate(g Gate) {
if g == nil {
defaultGate = &singleFlightGate{}
return
}
defaultGate = g
}

func gate() Gate { return defaultGate }

// modelKey derives a stable key per underlying model context for guarding stateless ops
func modelKey(model *ModelContext) *whisper.Context {
if model == nil || model.ctxAccessor() == nil {
return nil
}
ctx, _ := model.ctxAccessor().context()
return ctx
}
21 changes: 19 additions & 2 deletions bindings/go/pkg/whisper/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,21 @@ import (
// ERRORS

var (
ErrUnableToLoadModel = errors.New("unable to load model")
ErrInternalAppError = errors.New("internal application error")
ErrUnableToLoadModel = errors.New("unable to load model")

// Deprecated: Use ErrModelClosed instead for checking the model is closed error
ErrInternalAppError = errors.New("internal application error")

ErrProcessingFailed = errors.New("processing failed")
ErrUnsupportedLanguage = errors.New("unsupported language")
ErrModelNotMultilingual = errors.New("model is not multilingual")
ErrModelClosed = errors.Join(errors.New("model has been closed"), ErrInternalAppError)
ErrStatelessBusy = errors.New("stateless context is busy; concurrent processing not supported")

// Private errors
errParametersRequired = errors.New("parameters are required")
errModelRequired = errors.New("model is required")
errUnableToCreateState = errors.New("unable to create state")
)

///////////////////////////////////////////////////////////////////////////////
Expand All @@ -26,3 +36,10 @@ const SampleRate = whisper.SampleRate

// SampleBits is the number of bytes per sample.
const SampleBits = whisper.SampleBits

type SamplingStrategy uint32

const (
SAMPLING_GREEDY SamplingStrategy = SamplingStrategy(whisper.SAMPLING_GREEDY)
SAMPLING_BEAM_SEARCH SamplingStrategy = SamplingStrategy(whisper.SAMPLING_BEAM_SEARCH)
)
Loading