Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
289 changes: 26 additions & 263 deletions README.md

Large diffs are not rendered by default.

49 changes: 13 additions & 36 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,52 +97,29 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})

if (EMSCRIPTEN)
add_subdirectory(whisper.wasm)
set_target_properties(libmain PROPERTIES FOLDER "libs")
add_subdirectory(stream.wasm)
set_target_properties(libstream PROPERTIES FOLDER "libs")
add_subdirectory(command.wasm)
set_target_properties(libcommand PROPERTIES FOLDER "libs")
#add_subdirectory(talk.wasm)
#set_target_properties(libtalk PROPERTIES FOLDER "libs")
add_subdirectory(bench.wasm)
set_target_properties(libbench PROPERTIES FOLDER "libs")
elseif(CMAKE_JS_VERSION)
add_subdirectory(addon.node)
set_target_properties(addon.node PROPERTIES FOLDER "examples")
else()
add_subdirectory(main)
set_target_properties(main PROPERTIES FOLDER "examples")
if (WHISPER_SDL2)
add_subdirectory(stream)
set_target_properties(stream PROPERTIES FOLDER "examples")
endif (WHISPER_SDL2)
add_subdirectory(server)
set_target_properties(server PROPERTIES FOLDER "examples")
if (WHISPER_SDL2)
add_subdirectory(command)
set_target_properties(command PROPERTIES FOLDER "examples")
endif (WHISPER_SDL2)
add_subdirectory(cli)
add_subdirectory(bench)
set_target_properties(bench PROPERTIES FOLDER "examples")
add_subdirectory(server)
add_subdirectory(quantize)
set_target_properties(quantize PROPERTIES FOLDER "examples")
if (WHISPER_SDL2)
# TODO: disabled until update
# https://github.com/ggerganov/whisper.cpp/issues/1818
#add_subdirectory(talk)
#set_target_properties(talk PROPERTIES FOLDER "examples")
add_subdirectory(talk-llama)
set_target_properties(talk-llama PROPERTIES FOLDER "examples")
add_subdirectory(lsp)
set_target_properties(lsp PROPERTIES FOLDER "examples")
if (GGML_SYCL)
add_subdirectory(sycl)
set_target_properties(ls-sycl-device PROPERTIES FOLDER "examples")
endif()
endif (WHISPER_SDL2)
if (WHISPER_SDL2)
add_subdirectory(stream)
add_subdirectory(command)
add_subdirectory(talk-llama)
add_subdirectory(lsp)
if (GGML_SYCL)
add_subdirectory(sycl)
endif()
endif (WHISPER_SDL2)

add_subdirectory(deprecation-warning)
endif()

if (WHISPER_SDL2)
add_subdirectory(wchess)
set_target_properties(wchess PROPERTIES FOLDER "examples")
endif (WHISPER_SDL2)
4 changes: 3 additions & 1 deletion examples/bench/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
set(TARGET bench)
set(TARGET whisper-bench)
add_executable(${TARGET} bench.cpp)

include(DefaultTargetOptions)

target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})

install(TARGETS ${TARGET} RUNTIME)
9 changes: 3 additions & 6 deletions examples/bench/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# bench
# whisper.cpp/examples/bench

A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of
the transformer on some random audio data and records the execution time. This way we can have an objective comparison
Expand All @@ -7,11 +7,8 @@ of the performance of the model for various setups.
Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89

```bash
# build the bench tool
$ make bench

# run it on the small.en model using 4 threads
$ ./bench -m ./models/ggml-small.en.bin -t 4
# run the bench too on the small.en model using 4 threads
$ ./build/bin/whisper-bench -m ./models/ggml-small.en.bin -t 4

whisper_model_load: loading model from './models/ggml-small.en.bin'
whisper_model_load: n_vocab = 51864
Expand Down
6 changes: 4 additions & 2 deletions examples/main/CMakeLists.txt → examples/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
set(TARGET main)
add_executable(${TARGET} main.cpp)
set(TARGET whisper-cli)
add_executable(${TARGET} cli.cpp)

include(DefaultTargetOptions)

target_link_libraries(${TARGET} PRIVATE common whisper ${FFMPEG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})

install(TARGETS ${TARGET} RUNTIME)
18 changes: 14 additions & 4 deletions examples/main/README.md → examples/cli/README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# main
# whisper.cpp/examples/cli

This is the main example demonstrating most of the functionality of the Whisper model.
It can be used as a reference for using the `whisper.cpp` library in other projects.

```
./main -h
./build/bin/whisper-cli -h

usage: ./main [options] file0.wav file1.wav ...
usage: ./build-pkg/bin/whisper-cli [options] file0.wav file1.wav ...

options:
-h, --help [default] show this help message and exit
Expand All @@ -20,9 +20,12 @@ options:
-sow, --split-on-word [false ] split on word rather than on token
-bo N, --best-of N [5 ] number of best candidates to keep
-bs N, --beam-size N [5 ] beam size for beam search
-ac N, --audio-ctx N [0 ] audio context size (0 - all)
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
-tp, --temperature N [0.00 ] The sampling temperature, between 0 and 1
-tpi, --temperature-inc N [0.20 ] The increment of temperature, between 0 and 1
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
-tr, --translate [false ] translate from source language to english
-di, --diarize [false ] stereo audio diarization
Expand All @@ -38,16 +41,23 @@ options:
-oj, --output-json [false ] output result in a JSON file
-ojf, --output-json-full [false ] include more information in the JSON file
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
-np, --no-prints [false ] do not print anything other than the results
-ps, --print-special [false ] print special tokens
-pc, --print-colors [false ] print colors
-pp, --print-progress [false ] print progress
-nt, --no-timestamps [false ] do not print timestamps
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
-dl, --detect-language [false ] exit after automatically detecting language
--prompt PROMPT [ ] initial prompt
--prompt PROMPT [ ] initial prompt (max n_text_ctx/2 tokens)
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
-f FNAME, --file FNAME [ ] input WAV file path
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
-dtw MODEL --dtw MODEL [ ] compute token-level timestamps
-ls, --log-score [false ] log best decoder scores of tokens
-ng, --no-gpu [false ] disable GPU
-fa, --flash-attn [false ] flash attention
--suppress-regex REGEX [ ] regular expression matching tokens to suppress
--grammar GRAMMAR [ ] GBNF grammar to guide decoding
--grammar-rule RULE [ ] top-level GBNF grammar rule name
--grammar-penalty N [100.0 ] scales down logits of nongrammar tokens
```
File renamed without changes.
5 changes: 3 additions & 2 deletions examples/command/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
if (WHISPER_SDL2)
# command
set(TARGET command)
set(TARGET whisper-command)
add_executable(${TARGET} command.cpp)

include(DefaultTargetOptions)

target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${CMAKE_THREAD_LIBS_INIT})

install(TARGETS ${TARGET} RUNTIME)
endif ()
15 changes: 8 additions & 7 deletions examples/command/README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# command
# whisper.cpp/examples/command

This is a basic Voice Assistant example that accepts voice commands from the microphone.
More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/issues/171).

```bash
# Run with default arguments and small model
./command -m ./models/ggml-small.en.bin -t 8
./whisper-command -m ./models/ggml-small.en.bin -t 8

# On Raspberry Pi, use tiny or base models + "-ac 768" for better performance
./command -m ./models/ggml-tiny.en.bin -ac 768 -t 3 -c 0
./whisper-command -m ./models/ggml-tiny.en.bin -ac 768 -t 3 -c 0
```

https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
Expand All @@ -23,18 +23,18 @@ Initial tests show that this approach might be extremely efficient in terms of p

```bash
# Run in guided mode, the list of allowed commands is in commands.txt
./command -m ./models/ggml-base.en.bin -cmd ./examples/command/commands.txt
./whisper-command -m ./models/ggml-base.en.bin -cmd ./examples/command/commands.txt

# On Raspberry Pi, in guided mode you can use "-ac 128" for extra performance
./command -m ./models/ggml-tiny.en.bin -cmd ./examples/command/commands.txt -ac 128 -t 3 -c 0
./whisper-command -m ./models/ggml-tiny.en.bin -cmd ./examples/command/commands.txt -ac 128 -t 3 -c 0
```

https://user-images.githubusercontent.com/1991296/207435352-8fc4ed3f-bde5-4555-9b8b-aeeb76bee969.mp4


## Building

The `command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
The `whisper-command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:

```bash
# Install SDL2
Expand All @@ -47,5 +47,6 @@ sudo dnf install SDL2 SDL2-devel
# Install SDL2 on Mac OS
brew install sdl2

make command
cmake -B build -DWHISPER_SDL2=ON
cmake --build build --config Release
```
4 changes: 4 additions & 0 deletions examples/deprecation-warning/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
add_executable(main ./deprecation-warning.cpp)
add_executable(bench ./deprecation-warning.cpp)
add_executable(stream ./deprecation-warning.cpp)
add_executable(command ./deprecation-warning.cpp)
17 changes: 17 additions & 0 deletions examples/deprecation-warning/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Migration notice for binary filenames

> [!IMPORTANT]
[2024 Dec 20] Binaries have been renamed w/ a `whisper-` prefix. `main` is now `whisper-cli`, `server` is `whisper-server`, etc (https://github.com/ggerganov/whisper.cpp/pull/2648)

This migration was important, but it is a breaking change that may not always be immediately obvious to users.

Please update all scripts and workflows to use the new binary names.

| Old Filename | New Filename |
| ---- | ---- |
| main | whisper-cli |
| bench | whisper-bench |
| stream | whisper-stream |
| command | whisper-command |
| server | whisper-server |
| talk-llama | whisper-talk-llama |
34 changes: 34 additions & 0 deletions examples/deprecation-warning/deprecation-warning.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Warns users that this filename was deprecated, and provides a link for more information.

#include <cstdio>
#include <string>

// Main
int main(int argc, char** argv) {
std::string filename = "main";
if (argc >= 1) {
filename = argv[0];
}

// Get only the program name from the full path
size_t pos = filename.find_last_of("/\\");
if (pos != std::string::npos) {
filename = filename.substr(pos+1);
}

// Append "whisper-" to the beginning of filename to get the replacemnt filename
std::string replacement_filename = "whisper-" + filename;

// The exception is if the filename is "main", then our replacement filename is "whisper-cli"
if (filename == "main") {
replacement_filename = "whisper-cli";
}

fprintf(stdout, "\n");
fprintf(stdout, "WARNING: The binary '%s' is deprecated.\n", filename.c_str());
fprintf(stdout, " Please use '%s' instead.\n", replacement_filename.c_str());
fprintf(stdout, " See https://github.com/ggerganov/whisper.cpp/tree/master/examples/deprecation-warning/README.md for more information.\n");
fprintf(stdout, "\n");

return EXIT_FAILURE;
}
4 changes: 2 additions & 2 deletions examples/generate-karaoke.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# Press Ctrl+C to stop recording
#

executable="./main"
executable="./build/bin/whisper-cli"
model="base.en"
model_path="models/ggml-$model.bin"

Expand Down Expand Up @@ -46,7 +46,7 @@ ffmpeg -y -i ./rec.wav -ar 16000 -ac 1 -c:a pcm_s16le ./rec16.wav > /dev/null 2>

# run Whisper
echo "Processing ..."
./main -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1
${executable} -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1

# generate Karaoke video
echo "Generating video ..."
Expand Down
6 changes: 3 additions & 3 deletions examples/livestream.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ model="base.en"

check_requirements()
{
if ! command -v ./main &>/dev/null; then
if ! command -v ./build/bin/whisper-cli &>/dev/null; then
echo "whisper.cpp main executable is required (make)"
exit 1
fi
Expand Down Expand Up @@ -100,7 +100,7 @@ while [ $running -eq 1 ]; do
err=$(cat /tmp/whisper-live.err | wc -l)
done

./main -t 8 -m ./models/ggml-${model}.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
./build/bin/whisper-cli -t 8 -m ./models/ggml-${model}.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1

while [ $SECONDS -lt $((($i+1)*$step_s)) ]; do
sleep 1
Expand All @@ -109,4 +109,4 @@ while [ $running -eq 1 ]; do
done

killall -v ffmpeg
killall -v main
killall -v whisper-cli
4 changes: 3 additions & 1 deletion examples/server/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(TARGET server)
set(TARGET whisper-server)
add_executable(${TARGET} server.cpp httplib.h)

include(DefaultTargetOptions)
Expand All @@ -8,3 +8,5 @@ target_link_libraries(${TARGET} PRIVATE common json_cpp whisper ${CMAKE_THREAD_L
if (WIN32)
target_link_libraries(${TARGET} PRIVATE ws2_32)
endif()

install(TARGETS ${TARGET} RUNTIME)
6 changes: 3 additions & 3 deletions examples/server/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# whisper.cpp http server
# whisper.cpp/examples/server

Simple http server. WAV Files are passed to the inference model via http requests.

Expand All @@ -7,9 +7,9 @@ https://github.com/ggerganov/whisper.cpp/assets/1991296/e983ee53-8741-4eb5-9048-
## Usage

```
./server -h
./build/bin/whisper-server -h

usage: ./bin/server [options]
usage: ./build/bin/whisper-server [options]

options:
-h, --help [default] show this help message and exit
Expand Down
4 changes: 2 additions & 2 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,8 @@ int main(int argc, char ** argv) {
if (sparams.ffmpeg_converter) {
// if file is not wav, convert to wav
// write to temporary file
const std::string temp_filename_base = std::tmpnam(nullptr);
//const std::string temp_filename_base = std::tmpnam(nullptr);
const std::string temp_filename_base = "whisper-server-tmp"; // TODO: this is a hack, remove when the mutext is removed
const std::string temp_filename = temp_filename_base + ".wav";
std::ofstream temp_file{temp_filename, std::ios::binary};
temp_file << audio_file.content;
Expand Down Expand Up @@ -711,7 +712,6 @@ int main(int argc, char ** argv) {
}
}


printf("Successfully loaded %s\n", filename.c_str());

// print system information
Expand Down
5 changes: 3 additions & 2 deletions examples/stream/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
if (WHISPER_SDL2)
# stream
set(TARGET stream)
set(TARGET whisper-stream)
add_executable(${TARGET} stream.cpp)

include(DefaultTargetOptions)

target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${CMAKE_THREAD_LIBS_INIT})

install(TARGETS ${TARGET} RUNTIME)
endif ()
Loading
Loading