Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions bindings/ruby/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
LICENSE
pkg/
lib/whisper.*
ext/sources/*
!ext/sources/CMakeGraphVizOptions.cmake
ext/mkmf.log
10 changes: 4 additions & 6 deletions bindings/ruby/ext/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@ Makefile
whisper.so
whisper.bundle
whisper.dll
scripts/get-flags.mk
*.o
/*/**/*.c
/*/**/*.cpp
/*/**/*.h
/*/**/*.m
/*/**/*.metal
*.a
sources/*
!sources/CMakeGraphVizOptions.cmake
mkmf.log
36 changes: 24 additions & 12 deletions bindings/ruby/ext/options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,39 @@ def cmake_options
Dir.chdir __dir__ do
output = `#{@cmake.shellescape} -S sources -B build -L`
end
started = false
@cmake_options = output.lines.filter_map {|line|
if line.chomp == "-- Cache values"
started = true
next
end
next unless started
option, value = line.chomp.split("=", 2)
name, type = option.split(":", 2)
[name, type, value]
}
@cmake_options = output.lines.drop_while {|line| line.chomp != "-- Cache values"}.drop(1)
.filter_map {|line|
option, value = line.chomp.split("=", 2)
name, type = option.split(":", 2)
[
name,
[
type,
type == "BOOL" ? value == "ON" : value
]
]
}.to_h
end

private

def configure
cmake_options.each do |name, type, default_value|
cmake_options.each_pair do |name, (type, default_value)|
option = option_name(name)
value = type == "BOOL" ? enable_config(option) : arg_config("--#{option}")
@options[name] = [type, value]
end

configure_coreml
end

def configure_coreml
use_coreml = if @options["WHISPER_COREML"][1].nil?
cmake_options["WHISPER_COREML"][1]
else
@options["WHISPER_COREML"][1]
end
$CPPFLAGS << " -DRUBY_WHISPER_USE_COREML" if use_coreml
end

def option_name(name)
Expand Down
13 changes: 13 additions & 0 deletions bindings/ruby/ext/ruby_whisper.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ ID id_new;
ID id_to_path;
ID id_URI;
ID id_pre_converted_models;
ID id_coreml_compiled_models;
ID id_cache;

static bool is_log_callback_finalized = false;

Expand Down Expand Up @@ -83,6 +85,14 @@ static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
return rb_str_new2(str_full);
}

/*
* call-seq:
* system_info_str -> String
*/
static VALUE ruby_whisper_s_system_info_str(VALUE self) {
return rb_str_new2(whisper_print_system_info());
}

static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
is_log_callback_finalized = true;
return Qnil;
Expand Down Expand Up @@ -130,6 +140,8 @@ void Init_whisper() {
id_to_path = rb_intern("to_path");
id_URI = rb_intern("URI");
id_pre_converted_models = rb_intern("pre_converted_models");
id_coreml_compiled_models = rb_intern("coreml_compiled_models");
id_cache = rb_intern("cache");

mWhisper = rb_define_module("Whisper");
mVAD = rb_define_module_under(mWhisper, "VAD");
Expand All @@ -145,6 +157,7 @@ void Init_whisper() {
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
rb_define_singleton_method(mWhisper, "system_info_str", ruby_whisper_s_system_info_str, 0);
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);

Expand Down
48 changes: 37 additions & 11 deletions bindings/ruby/ext/ruby_whisper_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ extern ID id_new;
extern ID id_to_path;
extern ID id_URI;
extern ID id_pre_converted_models;
extern ID id_coreml_compiled_models;
extern ID id_cache;

extern VALUE cContext;
extern VALUE eError;
extern VALUE cModel;

extern const rb_data_type_t ruby_whisper_params_type;
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
extern VALUE rb_whisper_model_initialize(VALUE context);
extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
extern VALUE rb_whisper_model_s_new(VALUE context);
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
extern void prepare_transcription(ruby_whisper_params *rwp, VALUE *context);

static void
Expand Down Expand Up @@ -53,6 +55,9 @@ ruby_whisper_memsize(const void *p)
if (!rw) {
return 0;
}
if (rw->context) {
size += sizeof(rw->context);
}
return size;
}

Expand All @@ -79,6 +84,13 @@ ruby_whisper_normalize_model_path(VALUE model_path)
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, model_path);
if (!NIL_P(pre_converted_model)) {
model_path = pre_converted_model;
#ifdef RUBY_WHISPER_USE_COREML
VALUE coreml_converted_models = rb_funcall(cModel, id_coreml_compiled_models, 0);
VALUE coreml_converted_model = rb_hash_aref(coreml_converted_models, pre_converted_model);
if (!NIL_P(coreml_converted_model)) {
rb_funcall(coreml_converted_model, id_cache, 0);
}
#endif
}
else if (TYPE(model_path) == T_STRING) {
const char * model_path_str = StringValueCStr(model_path);
Expand Down Expand Up @@ -293,13 +305,20 @@ VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
// Should check when samples.respond_to?(:length)?
} else {
if (TYPE(samples) == T_ARRAY) {
n_samples = RARRAY_LEN(samples);
if (RARRAY_LEN(samples) > INT_MAX) {
rb_raise(rb_eArgError, "samples are too long");
}
n_samples = (int)RARRAY_LEN(samples);
} else if (memory_view_available_p) {
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
view.obj = Qnil;
rb_raise(rb_eArgError, "unable to get a memory view");
}
n_samples = view.byte_size / view.item_size;
ssize_t n_samples_size = view.byte_size / view.item_size;
if (n_samples_size > INT_MAX) {
rb_raise(rb_eArgError, "samples are too long");
}
n_samples = (int)n_samples_size;
} else if (rb_respond_to(samples, id_length)) {
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
} else {
Expand Down Expand Up @@ -387,10 +406,17 @@ ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
view.obj = Qnil;
rb_raise(rb_eArgError, "unable to get a memory view");
}
n_samples = view.byte_size / view.item_size;
ssize_t n_samples_size = view.byte_size / view.item_size;
if (n_samples_size > INT_MAX) {
rb_raise(rb_eArgError, "samples are too long");
}
n_samples = (int)n_samples_size;
} else {
if (TYPE(samples) == T_ARRAY) {
n_samples = RARRAY_LEN(samples);
if (RARRAY_LEN(samples) > INT_MAX) {
rb_raise(rb_eArgError, "samples are too long");
}
n_samples = (int)RARRAY_LEN(samples);
} else if (rb_respond_to(samples, id_length)) {
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
} else {
Expand Down Expand Up @@ -476,7 +502,7 @@ ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
return INT2NUM(t0);
return LONG2NUM(t0);
}

/*
Expand All @@ -494,7 +520,7 @@ ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
TypedData_Get_Struct(self, ruby_whisper, &ruby_whisper_type, rw);
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
return INT2NUM(t1);
return LONG2NUM(t1);
}

/*
Expand Down Expand Up @@ -552,7 +578,7 @@ ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
static VALUE
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
{
return rb_whisper_segment_initialize(self, NUM2INT(i_segment));
return rb_whisper_segment_s_new(self, NUM2INT(i_segment));
}

/*
Expand Down Expand Up @@ -586,7 +612,7 @@ ruby_whisper_each_segment(VALUE self)

const int n_segments = whisper_full_n_segments(rw->context);
for (int i = 0; i < n_segments; ++i) {
rb_yield(rb_whisper_segment_initialize(self, i));
rb_yield(rb_whisper_segment_s_new(self, i));
}

return self;
Expand All @@ -599,7 +625,7 @@ ruby_whisper_each_segment(VALUE self)
static VALUE
ruby_whisper_get_model(VALUE self)
{
return rb_whisper_model_initialize(self);
return rb_whisper_model_s_new(self);
}

void
Expand Down
2 changes: 1 addition & 1 deletion bindings/ruby/ext/ruby_whisper_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static VALUE ruby_whisper_model_allocate(VALUE klass) {
return TypedData_Make_Struct(klass, ruby_whisper_model, &rb_whisper_model_type, rwm);
}

VALUE rb_whisper_model_initialize(VALUE context) {
VALUE rb_whisper_model_s_new(VALUE context) {
ruby_whisper_model *rwm;
const VALUE model = ruby_whisper_model_allocate(cModel);
TypedData_Get_Struct(model, ruby_whisper_model, &rb_whisper_model_type, rwm);
Expand Down
4 changes: 2 additions & 2 deletions bindings/ruby/ext/ruby_whisper_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ extern VALUE cVADParams;
extern ID id_call;

extern VALUE ruby_whisper_normalize_model_path(VALUE model_path);
extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
extern VALUE rb_whisper_segment_s_new(VALUE context, int index);
extern const rb_data_type_t ruby_whisper_vad_params_type;

static ID param_names[RUBY_WHISPER_PARAMS_PARAM_NAMES_COUNT];
Expand Down Expand Up @@ -110,7 +110,7 @@ static void new_segment_callback(struct whisper_context *ctx, struct whisper_sta
const int n_segments = whisper_full_n_segments_from_state(state);
for (int i = n_new; i > 0; i--) {
int i_segment = n_segments - i;
VALUE segment = rb_whisper_segment_initialize(*container->context, i_segment);
VALUE segment = rb_whisper_segment_s_new(*container->context, i_segment);
for (int j = 0; j < callbacks_len; j++) {
VALUE cb = rb_ary_entry(container->callbacks, j);
rb_funcall(cb, id_call, 1, segment);
Expand Down
6 changes: 3 additions & 3 deletions bindings/ruby/ext/ruby_whisper_segment.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ ruby_whisper_segment_allocate(VALUE klass)
}

VALUE
rb_whisper_segment_initialize(VALUE context, int index)
rb_whisper_segment_s_new(VALUE context, int index)
{
ruby_whisper_segment *rws;
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
Expand All @@ -63,7 +63,7 @@ ruby_whisper_segment_get_start_time(VALUE self)
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
return INT2NUM(t0 * 10);
return LONG2NUM(t0 * 10);
}

/*
Expand All @@ -81,7 +81,7 @@ ruby_whisper_segment_get_end_time(VALUE self)
TypedData_Get_Struct(rws->context, ruby_whisper, &ruby_whisper_type, rw);
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
return INT2NUM(t1 * 10);
return LONG2NUM(t1 * 10);
}

/*
Expand Down
57 changes: 56 additions & 1 deletion bindings/ruby/lib/whisper/model/uri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,44 @@ def format_bytesize(bytesize)
end
end

class ZipURI < URI
def cache
zip_path = Pathname(super)
dest = unzipped_path
return if dest.exist? && dest.mtime >= zip_path.mtime
escaping dest do
system "unzip", "-q", "-d", zip_path.dirname.to_path, zip_path.to_path, exception: true
end
zip_path.to_path
end

def clear_cache
super
unzipped_path.rmtree if unzipped_path.exist?
end

private

def unzipped_path
cache_path.sub_ext("")
end

def escaping(path)
escaped = Pathname("#{path}.removing")
if path.exist?
escaped.rmtree if escaped.exist?
path.rename escaped
end
yield
ensure
if path.exist?
escaped.rmtree if escaped.exist?
else
escaped.rename path if escaped.exist?
end
end
end

@pre_converted_models = %w[
tiny
tiny.en
Expand Down Expand Up @@ -171,8 +209,25 @@ def format_bytesize(bytesize)
@pre_converted_models[name] = URI.new("https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-#{name}.bin")
end

@coreml_compiled_models = %w[
tiny
tiny.en
base
base.en
small
small.en
medium
medium.en
large-v1
large-v2
large-v3
large-v3-turbo
].each_with_object({}) do |name, models|
models[@pre_converted_models[name]] = ZipURI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}-encoder.mlmodelc.zip")
end

class << self
attr_reader :pre_converted_models
attr_reader :pre_converted_models, :coreml_compiled_models
end
end
end
Loading
Loading