Skip to content

Commit a4ae235

Browse files
committed
Merge remote-tracking branch 'ggerganov/master'
* ggerganov/master: (49 commits) cli : add --suppress_nst support (ggml-org#2664) cli : add no_speech_thold (ggml-org#2663) cmake : remove hardcoded install rpath server : fix help print ruby : bug fix on callbacks and no_speech_prob (ggml-org#2656) server : add no-speech threshold parameter and functionality (ggml-org#2654) whisper : rename suppress_non_speech_tokens to suppress_nst (ggml-org#2653) server : add option to suppress non-speech tokens (ggml-org#2649) whisper : rename binaries + fix install (ggml-org#2648) ruby : update gem version to v1.3.1 release : v1.7.3 ci : msys enable SDL2 build (ggml-org#2635) ruby : sync ggml (ggml-org#2643) android : try to fix build files : remove old sources sync : ggml talk-llama : sync llama.cpp sync : ggml ggml : update ggml_backend_cpu_device_supports_op (llama/10867) vulkan: bugfixes for small subgroup size systems + llvmpipe test (llama/10809) ...
2 parents 493e336 + 7d55637 commit a4ae235

File tree

161 files changed

+6262
-7106
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

161 files changed

+6262
-7106
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ jobs:
283283
- name: Build using CMake
284284
shell: msys2 {0}
285285
run: |
286-
cmake -B build
286+
cmake -B build -DWHISPER_SDL2=ON
287287
cmake --build build --config ${{ matrix.build }} -j $(nproc)
288288
289289
- name: Clean after building using CMake

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
22
project("whisper.cpp" C CXX)
3-
project("whisper.cpp" VERSION 1.7.2)
3+
project("whisper.cpp" VERSION 1.7.3)
44
include(CheckIncludeFileCXX)
55

66
set(SOVERSION 1)

README.md

Lines changed: 27 additions & 264 deletions
Large diffs are not rendered by default.

bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,11 +181,11 @@ public void suppressBlanks(boolean enable) {
181181
}
182182

183183
/** Flag to suppress non-speech tokens. */
184-
public CBool suppress_non_speech_tokens;
184+
public CBool suppress_nst;
185185

186186
/** Flag to suppress non-speech tokens. */
187187
public void suppressNonSpeechTokens(boolean enable) {
188-
suppress_non_speech_tokens = enable ? CBool.TRUE : CBool.FALSE;
188+
suppress_nst = enable ? CBool.TRUE : CBool.FALSE;
189189
}
190190

191191
/** Initial decoding temperature. */
@@ -315,7 +315,7 @@ protected List<String> getFieldOrder() {
315315
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
316316
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
317317
"tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
318-
"suppress_blank", "suppress_non_speech_tokens", "temperature", "max_initial_ts", "length_penalty",
318+
"suppress_blank", "suppress_nst", "temperature", "max_initial_ts", "length_penalty",
319319
"temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
320320
"new_segment_callback", "new_segment_callback_user_data",
321321
"progress_callback", "progress_callback_user_data",

bindings/javascript/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "whisper.cpp",
3-
"version": "1.7.2",
3+
"version": "1.7.3",
44
"description": "Whisper speech recognition",
55
"main": "whisper.js",
66
"scripts": {

bindings/ruby/README.md

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Usage
2222
```ruby
2323
require "whisper"
2424

25-
whisper = Whisper::Context.new(Whisper::Model["base"])
25+
whisper = Whisper::Context.new("base")
2626

2727
params = Whisper::Params.new
2828
params.language = "en"
@@ -44,20 +44,26 @@ end
4444
Some models are prepared up-front:
4545

4646
```ruby
47-
base_en = Whisper::Model["base.en"]
47+
base_en = Whisper::Model.pre_converted_models["base.en"]
4848
whisper = Whisper::Context.new(base_en)
4949
```
5050

5151
At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`:
5252

5353
```ruby
54-
Whisper::Model["base"].clear_cache
54+
Whisper::Model.pre_converted_models["base"].clear_cache
5555
```
5656

57-
You can see the list of prepared model names by `Whisper::Model.preconverted_model_names`:
57+
You also can use shorthand for pre-converted models:
5858

5959
```ruby
60-
puts Whisper::Model.preconverted_model_names
60+
whisper = Whisper::Context.new("base.en")
61+
```
62+
63+
You can see the list of prepared model names by `Whisper::Model.preconverted_models.keys`:
64+
65+
```ruby
66+
puts Whisper::Model.preconverted_models.keys
6167
# tiny
6268
# tiny.en
6369
# tiny-q5_1
@@ -124,13 +130,6 @@ end
124130
You can also add hook to params called on new segment:
125131

126132
```ruby
127-
def format_time(time_ms)
128-
sec, decimal_part = time_ms.divmod(1000)
129-
min, sec = sec.divmod(60)
130-
hour, min = min.divmod(60)
131-
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
132-
end
133-
134133
# Add hook before calling #transcribe
135134
params.on_new_segment do |segment|
136135
line = "[%{st} --> %{ed}] %{text}" % {
@@ -151,7 +150,7 @@ whisper.transcribe("path/to/audio.wav", params)
151150
You can see model information:
152151

153152
```ruby
154-
whisper = Whisper::Context.new(Whisper::Model["base"])
153+
whisper = Whisper::Context.new("base")
155154
model = whisper.model
156155

157156
model.n_vocab # => 51864
@@ -200,7 +199,7 @@ Using this feature, you are also able to suppress log:
200199
Whisper.log_set ->(level, buffer, user_data) {
201200
# do nothing
202201
}, nil
203-
Whisper::Context.new(MODEL)
202+
Whisper::Context.new("base")
204203
```
205204

206205
### Low-level API to transcribe ###
@@ -214,14 +213,14 @@ require "wavefile"
214213
reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
215214
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
216215

217-
whisper = Whisper::Context.new(Whisper::Model["base"])
216+
whisper = Whisper::Context.new("base")
218217
whisper.full(Whisper::Params.new, samples)
219218
whisper.each_segment do |segment|
220219
puts segment.text
221220
end
222221
```
223222

224-
The second argument `samples` may be an array, an object with `length` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
223+
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
225224

226225
License
227226
-------

bindings/ruby/Rakefile

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ task build: ["ext/Makefile", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp", "whis
2525
directory "pkg"
2626
CLOBBER.include "pkg"
2727

28-
TEST_MODEL = "../../models/ggml-base.en.bin"
2928
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
3029
SO_FILE = File.join("ext", LIB_NAME)
3130
LIB_FILE = File.join("lib", LIB_NAME)
@@ -41,23 +40,17 @@ file SO_FILE => "ext/Makefile" do |t|
4140
sh "make"
4241
end
4342
end
44-
CLEAN.include LIB_FILE
43+
CLEAN.include SO_FILE
4544

4645
directory "lib"
4746
file LIB_FILE => [SO_FILE, "lib"] do |t|
4847
copy t.source, t.name
4948
end
49+
CLEAN.include LIB_FILE
5050

5151
Rake::TestTask.new do |t|
5252
t.test_files = FileList["tests/test_*.rb"]
5353
end
54-
task test: [TEST_MODEL, LIB_FILE]
55-
56-
file TEST_MODEL do
57-
Dir.chdir "../.." do
58-
sh "./models/download-ggml-model.sh base.en"
59-
end
60-
end
6154

6255
TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
6356
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
@@ -67,4 +60,5 @@ file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
6760
end
6861
end
6962
CLEAN.include "tests/jfk_reader/jfk_reader.{o,#{RbConfig::CONFIG['DLEXT']}}"
70-
task test: TEST_MEMORY_VIEW
63+
64+
task test: [LIB_FILE, TEST_MEMORY_VIEW]

bindings/ruby/ext/extconf.rb

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,6 @@
111111
$MK_CFLAGS << ' -march=native -mtune=native'
112112
$HOST_CXXFLAGS << ' -march=native -mtune=native'
113113
end
114-
115-
if $UNAME_M.match? /aarch64.*/
116-
$MK_CFLAGS << ' -mcpu=native'
117-
$MK_CXXFLAGS << ' -mcpu=native'
118-
end
119114
else
120115
$MK_CFLAGS << ' -march=rv64gcv -mabi=lp64d'
121116
$MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
@@ -162,7 +157,6 @@
162157

163158
$OBJ_GGML <<
164159
'ggml/src/ggml.o' <<
165-
'ggml/src/ggml-aarch64.o' <<
166160
'ggml/src/ggml-alloc.o' <<
167161
'ggml/src/ggml-backend.o' <<
168162
'ggml/src/ggml-backend-reg.o' <<
@@ -172,7 +166,9 @@
172166
'ggml/src/ggml-cpu/ggml-cpu.o' <<
173167
'ggml/src/ggml-cpu/ggml-cpu-cpp.o' <<
174168
'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
175-
'ggml/src/ggml-cpu/ggml-cpu-quants.o'
169+
'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
170+
'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
171+
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
176172

177173
$OBJ_WHISPER <<
178174
'src/whisper.o'

0 commit comments

Comments
 (0)