cont

ggerganov · ggerganov · commit 715516311328 · 2024-12-20T13:33:25.000+02:00
diff --git a/README.md b/README.md
@@ -53,8 +53,6 @@ On Apple Silicon, the inference runs fully on the GPU via Metal:
 
 https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
 
-Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
-
 ## Quick start
 
 First clone the repository:
@@ -546,7 +544,7 @@ https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8
 ## Benchmarks
 
 In order to have an objective comparison of the performance of the inference across different system configurations,
-use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
+use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
 took to execute it. The results are summarized in the following Github issue:
 
 [Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
@@ -609,12 +607,11 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 
 | Example                                             | Web                                   | Description                                                                                                                     |
 | --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
-| [cli](examples/cli)                                 | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper                                                                       |
-| [bench](examples/bench)                             | [bench.wasm](examples/bench.wasm)     | Benchmark the performance of Whisper on your machine                                                                            |
-| [stream](examples/stream)                           | [stream.wasm](examples/stream.wasm)   | Real-time transcription of raw microphone capture                                                                               |
-| [command](examples/command)                         | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic                                                         |
+| [whisper-cli](examples/cli)                         | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper                                                                       |
+| [whisper-bench](examples/bench)                     | [bench.wasm](examples/bench.wasm)     | Benchmark the performance of Whisper on your machine                                                                            |
+| [whisper-stream](examples/stream)                   | [stream.wasm](examples/stream.wasm)   | Real-time transcription of raw microphone capture                                                                               |
+| [whisper-command](examples/command)                 | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic                                                         |
 | [wchess](examples/wchess)                           | [wchess.wasm](examples/wchess)        | Voice-controlled chess                                                                                                          |
-| [talk](examples/talk)                               | [talk.wasm](examples/talk.wasm)       | Talk with a GPT-2 bot                                                                                                           |
 | [talk-llama](examples/talk-llama)                   |                                       | Talk with a LLaMA bot                                                                                                           |
 | [whisper.objc](examples/whisper.objc)               |                                       | iOS mobile application using whisper.cpp                                                                                        |
 | [whisper.swiftui](examples/whisper.swiftui)         |                                       | SwiftUI iOS / macOS application using whisper.cpp                                                                               |
diff --git a/examples/bench/CMakeLists.txt b/examples/bench/CMakeLists.txt
@@ -1,6 +1,8 @@
-set(TARGET bench)
+set(TARGET whisper-bench)
 add_executable(${TARGET} bench.cpp)
 
 include(DefaultTargetOptions)
 
 target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
+
+install(TARGETS ${TARGET} RUNTIME)
diff --git a/examples/bench/README.md b/examples/bench/README.md
@@ -1,4 +1,4 @@
-# bench
+# whisper.cpp/examples/bench
 
 A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of
 the transformer on some random audio data and records the execution time. This way we can have an objective comparison
@@ -7,11 +7,8 @@ of the performance of the model for various setups.
 Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89
 
 ```bash
-# build the bench tool
-$ make bench
-
-# run it on the small.en model using 4 threads
-$ ./bench -m ./models/ggml-small.en.bin -t 4
+# run the bench too on the small.en model using 4 threads
+$ ./build/bin/whisper-bench -m ./models/ggml-small.en.bin -t 4
 
 whisper_model_load: loading model from './models/ggml-small.en.bin'
 whisper_model_load: n_vocab       = 51864
diff --git a/examples/command/CMakeLists.txt b/examples/command/CMakeLists.txt
@@ -1,9 +1,10 @@
 if (WHISPER_SDL2)
-    # command
-    set(TARGET command)
+    set(TARGET whisper-command)
     add_executable(${TARGET} command.cpp)
 
     include(DefaultTargetOptions)
 
     target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${CMAKE_THREAD_LIBS_INIT})
+
+    install(TARGETS ${TARGET} RUNTIME)
 endif ()
diff --git a/examples/command/README.md b/examples/command/README.md
@@ -1,14 +1,14 @@
-# command
+# whisper.cpp/examples/command
 
 This is a basic Voice Assistant example that accepts voice commands from the microphone.
 More info is available in [issue #171](https://github.com/ggerganov/whisper.cpp/issues/171).
 
 ```bash
 # Run with default arguments and small model
-./command -m ./models/ggml-small.en.bin -t 8
+./whisper-command -m ./models/ggml-small.en.bin -t 8
 
 # On Raspberry Pi, use tiny or base models + "-ac 768" for better performance
-./command -m ./models/ggml-tiny.en.bin -ac 768 -t 3 -c 0
+./whisper-command -m ./models/ggml-tiny.en.bin -ac 768 -t 3 -c 0
 ```
 
 https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
@@ -23,18 +23,18 @@ Initial tests show that this approach might be extremely efficient in terms of p
 
 ```bash
 # Run in guided mode, the list of allowed commands is in commands.txt
-./command -m ./models/ggml-base.en.bin -cmd ./examples/command/commands.txt
+./whisper-command -m ./models/ggml-base.en.bin -cmd ./examples/command/commands.txt
 
 # On Raspberry Pi, in guided mode you can use "-ac 128" for extra performance
-./command -m ./models/ggml-tiny.en.bin -cmd ./examples/command/commands.txt -ac 128 -t 3 -c 0
+./whisper-command -m ./models/ggml-tiny.en.bin -cmd ./examples/command/commands.txt -ac 128 -t 3 -c 0
 ```
 
 https://user-images.githubusercontent.com/1991296/207435352-8fc4ed3f-bde5-4555-9b8b-aeeb76bee969.mp4
 
 
 ## Building
 
-The `command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
+The `whisper-command` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
 
 ```bash
 # Install SDL2
@@ -47,5 +47,6 @@ sudo dnf install SDL2 SDL2-devel
 # Install SDL2 on Mac OS
 brew install sdl2
 
-make command
+cmake -B build -DWHISPER_SDL2=ON
+cmake --build build --config Release
 ```
diff --git a/examples/deprecation-warning/CMakeLists.txt b/examples/deprecation-warning/CMakeLists.txt
@@ -1,2 +1,8 @@
 add_executable(main ./deprecation-warning.cpp)
 target_compile_features(main PRIVATE cxx_std_11)
+
+add_executable(bench ./deprecation-warning.cpp)
+target_compile_features(bench PRIVATE cxx_std_11)
+
+add_executable(stream ./deprecation-warning.cpp)
+target_compile_features(stream PRIVATE cxx_std_11)
diff --git a/examples/stream/CMakeLists.txt b/examples/stream/CMakeLists.txt
@@ -1,9 +1,10 @@
 if (WHISPER_SDL2)
-    # stream
-    set(TARGET stream)
+    set(TARGET whisper-stream)
     add_executable(${TARGET} stream.cpp)
 
     include(DefaultTargetOptions)
 
     target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${CMAKE_THREAD_LIBS_INIT})
+
+    install(TARGETS ${TARGET} RUNTIME)
 endif ()
diff --git a/examples/stream/README.md b/examples/stream/README.md
@@ -1,11 +1,11 @@
-# stream
+# whisper.cpp/examples/stream
 
 This is a naive example of performing real-time inference on audio from your microphone.
-The `stream` tool samples the audio every half a second and runs the transcription continously.
+The `whisper-stream` tool samples the audio every half a second and runs the transcription continously.
 More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
 
 ```bash
-./build/bin/stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
+./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
 ```
 
 https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
@@ -15,7 +15,7 @@ https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a
 Setting the `--step` argument to `0` enables the sliding window mode:
 
 ```bash
- ./build/bin/stream -m ./models/ggml-base.en.bin -t 6 --step 0 --length 30000 -vth 0.6
+ ./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 6 --step 0 --length 30000 -vth 0.6
 ```
 
 In this mode, the tool will transcribe only after some speech activity is detected. A very
@@ -27,7 +27,7 @@ a transcription block that is suitable for parsing.
 
 ## Building
 
-The `stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
+The `whisper-stream` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
 
 ```bash
 # Install SDL2
@@ -43,7 +43,7 @@ brew install sdl2
 cmake -B build -DWHISPER_SDL2=ON
 cmake --build build --config Release
 
-./build/bin/stream
+./build/bin/whisper-stream
 ```
 
 ## Web version
diff --git a/scripts/bench-all.sh b/scripts/bench-all.sh
@@ -38,13 +38,13 @@ if [ "$encoder_only" -eq 0 ]; then
     printf "Running memcpy benchmark\n"
     printf "\n"
 
-    ./build/bin/bench -w 1 -t $n_threads 2>&1
+    ./build/bin/whisper-bench -w 1 -t $n_threads 2>&1
 
     printf "\n"
     printf "Running ggml_mul_mat benchmark with $n_threads threads\n"
     printf "\n"
 
-    ./build/bin/bench -w 2 -t $n_threads 2>&1
+    ./build/bin/whisper-bench -w 2 -t $n_threads 2>&1
 
     printf "\n"
     printf "Running benchmark for all models\n"
@@ -64,7 +64,7 @@ printf "| %6s | %6s | %16s | %13s | %3s | %3s | %7s | %7s | %7s | %7s | %7s |\n"
 for model in "${models[@]}"; do
     # actual run
     # store stderr output in a variable in order to parse it later
-    output=$(./build/bin/bench -m ./models/ggml-$model.bin -t $n_threads $fattn 2>&1)
+    output=$(./build/bin/whisper-bench -m ./models/ggml-$model.bin -t $n_threads $fattn 2>&1)
     ret=$?
 
     # parse the output:
diff --git a/scripts/bench-wts.sh b/scripts/bench-wts.sh
@@ -22,7 +22,7 @@ echo "Input file duration: ${DURATION}s"
 
 for model in $models; do
     echo "Running $model"
-    COMMAND="./main -m models/ggml-$model.bin -owts -f $1 -of $1.$model"
+    COMMAND="./build/bin/whisper-cli -m models/ggml-$model.bin -owts -f $1 -of $1.$model"
 
     if [ ! -z "$2" ]; then
         COMMAND="$COMMAND -fp $2"
diff --git a/scripts/bench.py b/scripts/bench.py
@@ -148,7 +148,7 @@ def extract_device(output: str) -> str:
     for thread in threads:
         for processor_count in processors:
             # Construct the command to run
-            cmd = f"./main -m models/{model} -t {thread} -p {processor_count} -f {sample_file}"
+            cmd = f"./build/bin/whisper-cli -m models/{model} -t {thread} -p {processor_count} -f {sample_file}"
             # Run the command and get the output
             process = subprocess.Popen(
                 cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
diff --git a/scripts/quantize-all.sh b/scripts/quantize-all.sh
@@ -19,8 +19,8 @@ for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do
     m="models/$i"
     if [ -f "$m" ]; then
         if [ "${m##*.}" == "bin" ]; then
-            ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
-            ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
+            ./build/bin/whisper-quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1};
+            ./build/bin/whisper-quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0};
             filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" )
         fi
     fi