Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
06f3267
Add VAD models
KitaitiMakoto May 23, 2025
86aac13
Extract function to normalize model path from ruby_whisper_initialize()
KitaitiMakoto May 23, 2025
eb3b1bc
Define ruby_whisper_vad_params struct
KitaitiMakoto May 25, 2025
2f96162
Add VAD-related features to Whisper::Params
KitaitiMakoto May 25, 2025
cf8465c
Add tests for VAD-related features
KitaitiMakoto May 25, 2025
944b2ac
Define Whisper::VADParams
KitaitiMakoto May 25, 2025
46f9626
Add Whisper::VAD::Params attributes
KitaitiMakoto May 26, 2025
2fb49e1
Add test suite for VAD::Params
KitaitiMakoto May 26, 2025
e3e25a8
Make older test to follow namespace change
KitaitiMakoto May 26, 2025
800b347
Add test for transcription with VAD
KitaitiMakoto May 26, 2025
55f4d2e
Add assertion for test_vad_params
KitaitiMakoto May 26, 2025
5a372d1
Add signatures for VAD-related methods
KitaitiMakoto May 26, 2025
8b5a8f8
Define VAD::Params#==
KitaitiMakoto May 26, 2025
7400ba0
Add test for VAD::Params#==
KitaitiMakoto May 26, 2025
a12d759
Fix Params#vad_params
KitaitiMakoto May 26, 2025
1981c30
Add test for Params#vad_params
KitaitiMakoto May 26, 2025
60a6ed1
Fix signature of Params#vad_params
KitaitiMakoto May 26, 2025
793a23a
Use macro to define VAD::Params params
KitaitiMakoto May 26, 2025
04f72d5
Define VAD::Params#initialize
KitaitiMakoto May 26, 2025
3436330
Add tests for VAD::Params#initialize
KitaitiMakoto May 26, 2025
fe75b22
Add signature for VAD::Params.new
KitaitiMakoto May 26, 2025
0d72f75
Add documentation on VAD in README
KitaitiMakoto May 26, 2025
14bec64
Wrap register_callbask in prepare_transcription for clear meanings
KitaitiMakoto May 26, 2025
0ec33e1
Set whisper_params.vad_params just before transcription
KitaitiMakoto May 26, 2025
aaa293b
Don't touch NULL
KitaitiMakoto May 27, 2025
684a680
Define ruby_whisper_params_type
KitaitiMakoto May 27, 2025
64b8b6f
Use TypedData_XXX for ruby_whisper_params instead of Data_XXX
KitaitiMakoto May 27, 2025
c2e9453
Remove unused functions
KitaitiMakoto May 27, 2025
b2bf7f8
Define rb_whisper_model_data_type
KitaitiMakoto May 27, 2025
d81efe4
Use TypedData_XXX for ruby_whisper_model instead of Data_XXX
KitaitiMakoto May 27, 2025
181b8c9
Define ruby_whisper_segment_type
KitaitiMakoto May 27, 2025
e317d37
Use TypedData_XXX for ruby_whisper_segment instead of Data_XXX
KitaitiMakoto May 27, 2025
ebed9c3
Define ruby_whisper_type
KitaitiMakoto May 27, 2025
dc6d93e
Use TypedData_XXX for ruby_whisper instead of Data_XXX
KitaitiMakoto May 27, 2025
aa52840
Qualify with const
KitaitiMakoto May 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions bindings/ruby/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,41 @@ See [models][] page for details.

Currently, whisper.cpp accepts only 16-bit WAV files.

### Voice Activity Detection (VAD) ###

Support for Voice Activity Detection (VAD) can be enabled by setting `Whisper::Params`'s `vad` argument to `true` and specifying VAD model:

```ruby
Whisper::Params.new(
vad: true,
vad_model_path: "silero-v5.1.2",
# other arguments...
)
```

When you pass the model name (`"silero-v5.1.2"`) or URI (`https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin`), it will be downloaded automatically.
Currently, "silero-v5.1.2" is registered as pre-converted model like ASR models. You also specify file path or URI of model.

If you need configure VAD behavior, pass params for that:

```ruby
Whisper::Params.new(
vad: true,
vad_model_path: "silero-v5.1.2",
vad_params: Whisper::VAD::Params.new(
threshold: 1.0, # defaults to 0.5
min_speech_duration_ms: 500, # defaults to 250
min_silence_duration_ms: 200, # defaults to 100
max_speech_duration_s: 30000, # default is FLT_MAX,
speech_pad_ms: 50, # defaults to 30
samples_overlap: 0.5 # defaults to 0.1
),
# other arguments...
)
```

For details on VAD, see [whisper.cpp's README](https://github.com/ggml-org/whisper.cpp?tab=readme-ov-file#voice-activity-detection-vad).

API
---

Expand Down
15 changes: 5 additions & 10 deletions bindings/ruby/ext/ruby_whisper.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
#include "ruby_whisper.h"

VALUE mWhisper;
VALUE mVAD;
VALUE cContext;
VALUE cParams;
VALUE cVADParams;
VALUE eError;

VALUE cSegment;
Expand All @@ -31,6 +33,7 @@ extern void init_ruby_whisper_params(VALUE *mWhisper);
extern void init_ruby_whisper_error(VALUE *mWhisper);
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
extern void init_ruby_whisper_model(VALUE *mWhisper);
extern void init_ruby_whisper_vad_params(VALUE *mVAD);
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);

/*
Expand Down Expand Up @@ -116,16 +119,6 @@ static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_d
return Qnil;
}

static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
rb_gc_mark(rwm->context);
}

static VALUE ruby_whisper_model_allocate(VALUE klass) {
ruby_whisper_model *rwm;
rwm = ALLOC(ruby_whisper_model);
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
}

void Init_whisper() {
id_to_s = rb_intern("to_s");
id_call = rb_intern("call");
Expand All @@ -139,6 +132,7 @@ void Init_whisper() {
id_pre_converted_models = rb_intern("pre_converted_models");

mWhisper = rb_define_module("Whisper");
mVAD = rb_define_module_under(mWhisper, "VAD");

rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
Expand All @@ -159,6 +153,7 @@ void Init_whisper() {
init_ruby_whisper_error(&mWhisper);
init_ruby_whisper_segment(&mWhisper, &cContext);
init_ruby_whisper_model(&mWhisper);
init_ruby_whisper_vad_params(&mVAD);

rb_require("whisper/model/uri");
}
5 changes: 5 additions & 0 deletions bindings/ruby/ext/ruby_whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@ typedef struct {
ruby_whisper_callback_container *progress_callback_container;
ruby_whisper_callback_container *encoder_begin_callback_container;
ruby_whisper_callback_container *abort_callback_container;
VALUE vad_params;
} ruby_whisper_params;

typedef struct {
struct whisper_vad_params params;
} ruby_whisper_vad_params;

typedef struct {
VALUE context;
int index;
Expand Down
Loading
Loading