Skip to content

Commit d13e8ba

Browse files
authored
Merge branch 'ggerganov:master' into embed_yolo_files
2 parents 8d6feac + 7bc1d8e commit d13e8ba

File tree

185 files changed

+12687
-23210
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

185 files changed

+12687
-23210
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161

6262
- name: Configure CMake
6363
working-directory: ./build
64-
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON ..
64+
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF ..
6565

6666
- name: Build
6767
working-directory: ./build
@@ -112,7 +112,7 @@ jobs:
112112

113113
- name: Configure CMake
114114
working-directory: ./build
115-
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON ..
115+
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF ..
116116

117117
- name: Build
118118
working-directory: ./build

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
build/
2+
build-blas/
23
build-debug/
34
build-release/
45
build-sanitize-addr/
@@ -30,6 +31,7 @@ tests/arm_neon.h
3031
zig-out/
3132
zig-cache/
3233

34+
*.o
3335
*.dot
3436

3537
*.sw?

CMakeLists.txt

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ endif()
2525

2626
# options
2727

28+
if (APPLE)
29+
set(GGML_METAL_DEFAULT ON)
30+
set(GGML_BLAS_DEFAULT ON)
31+
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
32+
else()
33+
set(GGML_METAL_DEFAULT OFF)
34+
set(GGML_BLAS_DEFAULT OFF)
35+
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
36+
endif()
37+
2838
option(BUILD_SHARED_LIBS "ggml: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
2939

3040
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
@@ -41,12 +51,13 @@ option(GGML_TEST_COVERAGE "ggml: enable test coverage" OFF)
4151

4252
option(GGML_PERF "ggml: enable perf timings" OFF)
4353
option(GGML_NO_ACCELERATE "ggml: disable Accelerate framework" OFF)
44-
option(GGML_OPENBLAS "ggml: use OpenBLAS" OFF)
45-
option(GGML_CLBLAST "ggml: use clBLAST" OFF)
54+
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
55+
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
56+
"ggml: BLAS library vendor")
4657
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
4758
option(GGML_CUDA "ggml: use CUDA" OFF)
4859
option(GGML_CUBLAS "ggml: use CUDA (deprecated)" OFF)
49-
option(GGML_METAL "ggml: use Metal" OFF)
60+
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
5061
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
5162
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
5263
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" OFF)

README.md

Lines changed: 1 addition & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Some of the development is currently happening in the [llama.cpp](https://github
2424

2525
- [X] Example of GPT-2 inference [examples/gpt-2](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2)
2626
- [X] Example of GPT-J inference [examples/gpt-j](https://github.com/ggerganov/ggml/tree/master/examples/gpt-j)
27-
- [X] Example of Whisper inference [examples/whisper](https://github.com/ggerganov/ggml/tree/master/examples/whisper)
27+
- [X] Example of Whisper inference [ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp)
2828
- [X] Example of LLaMA inference [ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp)
2929
- [X] Example of LLaMA training [ggerganov/llama.cpp/examples/baby-llama](https://github.com/ggerganov/llama.cpp/tree/master/examples/baby-llama)
3030
- [X] Example of Falcon inference [cmp-nct/ggllm.cpp](https://github.com/cmp-nct/ggllm.cpp)
@@ -44,20 +44,6 @@ Some of the development is currently happening in the [llama.cpp](https://github
4444
- [X] Example of multiple LLMs inference [foldl/chatllm.cpp](https://github.com/foldl/chatllm.cpp)
4545
- [X] SeamlessM4T inference *(in development)* https://github.com/facebookresearch/seamless_communication/tree/main/ggml
4646

47-
## Whisper inference (example)
48-
49-
With ggml you can efficiently run [Whisper](examples/whisper) inference on the CPU.
50-
51-
Memory requirements:
52-
53-
| Model | Disk | Mem |
54-
| --- | --- | --- |
55-
| tiny | 75 MB | ~280 MB |
56-
| base | 142 MB | ~430 MB |
57-
| small | 466 MB | ~1.0 GB |
58-
| medium | 1.5 GB | ~2.6 GB |
59-
| large | 2.9 GB | ~4.7 GB |
60-
6147
## GPT inference (example)
6248

6349
With ggml you can efficiently run [GPT-2](examples/gpt-2) and [GPT-J](examples/gpt-j) inference on the CPU.
@@ -128,11 +114,6 @@ cmake -DGGML_CUBLAS=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.1/bin/nvcc ..
128114
cmake -DCMAKE_C_COMPILER="$(hipconfig -l)/clang" -DCMAKE_CXX_COMPILER="$(hipconfig -l)/clang++" -DGGML_HIPBLAS=ON
129115
```
130116

131-
## Using clBLAST
132-
133-
```bash
134-
cmake -DGGML_CLBLAST=ON ..
135-
```
136117
## Compiling for Android
137118

138119
Download and unzip the NDK from this download [page](https://developer.android.com/ndk/downloads). Set the NDK_ROOT_PATH environment variable or provide the absolute path to the CMAKE_ANDROID_NDK in the command below.
@@ -170,64 +151,6 @@ export LD_LIBRARY_PATH=/data/local/tmp
170151
./bin/gpt-2-backend -m models/ggml-model.bin -p "this is an example"
171152
```
172153

173-
### CLBlast for Android
174-
175-
Build CLBlast.
176-
177-
```bash
178-
# In CLBlast/build
179-
$ANDROID_SDK_PATH/cmake/3.22.1/bin/cmake .. \
180-
-DCMAKE_SYSTEM_NAME=Android \
181-
-DCMAKE_SYSTEM_VERSION=33 \
182-
-DCMAKE_ANDROID_ARCH_ABI=arm64-v8a \
183-
-DCMAKE_ANDROID_NDK=$ANDROID_NDK_PATH \
184-
-DCMAKE_ANDROID_STL_TYPE=c++_static \
185-
-DOPENCL_ROOT=$(readlink -f ../../OpenCL-Headers) \
186-
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=BOTH \
187-
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
188-
189-
# Build libclblast.so
190-
make -j4
191-
```
192-
193-
Pull `libGLES_mali.so` to `libOpenCL.so`.
194-
195-
```bash
196-
# In ggml project root.
197-
mkdir arm64-v8a
198-
adb pull /system/vendor/lib64/egl/libGLES_mali.so arm64-v8a/libOpenCL.so
199-
```
200-
201-
Build ggml with CLBlast.
202-
203-
```bash
204-
# In ggml/build
205-
cd build
206-
$ANDROID_SDK_PATH/cmake/3.22.1/bin/cmake .. \
207-
-DGGML_CLBLAST=ON \
208-
-DCMAKE_SYSTEM_NAME=Android \
209-
-DCMAKE_SYSTEM_VERSION=33 \
210-
-DCMAKE_ANDROID_ARCH_ABI=arm64-v8a \
211-
-DCMAKE_ANDROID_NDK=$ANDROID_NDK_PATH \
212-
-DCMAKE_ANDROID_STL_TYPE=c++_shared \
213-
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH \
214-
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=BOTH \
215-
-DCLBLAST_HOME=$(readlink -f ../../CLBlast) \
216-
-DOPENCL_LIB=$(readlink -f ../arm64-v8a/libOpenCL.so)
217-
218-
# Run make, adb push, etc.
219-
```
220-
221-
Then in `adb shell`...
222-
223-
```bash
224-
cd /data/local/tmp
225-
export LD_LIBRARY_PATH=/system/vendor/lib64/egl:/data/local/tmp
226-
./bin/gpt-2-backend -m models/ggml-model.bin -n 64 -p "Pepperoni pizza"
227-
```
228-
229-
OpenCL does not have the same level of support in `ggml-backend` as CUDA or Metal. In the `gpt-2-backend` example, OpenCL will only be used for the matrix multiplications when evaluating large prompts.
230-
231154
## Resources
232155

233156
- [GGML - Large Language Models for Everyone](https://github.com/rustformers/llm/blob/main/crates/ggml/README.md): a description of the GGML format provided by the maintainers of the `llm` Rust crate, which provides Rust bindings for GGML

ci/run.sh

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -218,39 +218,6 @@ function gg_sum_mnist {
218218
gg_printf '```\n'
219219
}
220220

221-
# whisper
222-
223-
function gg_run_whisper {
224-
cd ${SRC}
225-
226-
gg_wget models-mnt/whisper/ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin
227-
gg_wget models-mnt/whisper/ https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav
228-
229-
cd build-ci-release
230-
231-
set -e
232-
233-
path_models="../models-mnt/whisper/"
234-
model_f16="${path_models}/ggml-base.en.bin"
235-
audio_0="${path_models}/jfk.wav"
236-
237-
(time ./bin/whisper -m ${model_f16} -f ${audio_0} ) 2>&1 | tee -a $OUT/${ci}-main.log
238-
239-
grep -q "And so my fellow Americans" $OUT/${ci}-main.log
240-
241-
set +e
242-
}
243-
244-
function gg_sum_whisper {
245-
gg_printf '### %s\n\n' "${ci}"
246-
247-
gg_printf 'Runs short Whisper transcription\n'
248-
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
249-
gg_printf '```\n'
250-
gg_printf '%s\n' "$(cat $OUT/${ci}-main.log)"
251-
gg_printf '```\n'
252-
}
253-
254221
# sam
255222

256223
function gg_run_sam {
@@ -344,11 +311,12 @@ if [ ! -z ${GG_BUILD_METAL} ]; then
344311
export GGML_METAL_PATH_RESOURCES="${SRC}/build-ci-release/bin"
345312
fi
346313

347-
test $ret -eq 0 && gg_run gpt_2
348-
test $ret -eq 0 && gg_run mnist
349-
test $ret -eq 0 && gg_run whisper
350-
test $ret -eq 0 && gg_run sam
351-
test $ret -eq 0 && gg_run yolo
314+
if [ -z ${GG_BUILD_NO_DOWNLOAD} ]; then
315+
test $ret -eq 0 && gg_run gpt_2
316+
test $ret -eq 0 && gg_run mnist
317+
test $ret -eq 0 && gg_run sam
318+
test $ret -eq 0 && gg_run yolo
319+
fi
352320

353321
if [ -z $GG_BUILD_LOW_PERF ]; then
354322
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 16 ]; then

examples/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ target_include_directories(common-ggml PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
2020

2121
add_subdirectory(gpt-2)
2222
add_subdirectory(gpt-j)
23-
add_subdirectory(whisper)
2423
add_subdirectory(mnist)
2524
add_subdirectory(sam)
2625
add_subdirectory(yolo)

examples/common.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
#include <io.h>
2525
#endif
2626

27+
#ifdef WHISPER_FFMPEG
28+
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
29+
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
30+
#endif
31+
2732
// Function to check if the next argument exists
2833
std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
2934
if (i + 1 < argc && argv[i + 1][0] != '-') {
@@ -637,7 +642,7 @@ bool is_wav_buffer(const std::string buf) {
637642

638643
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
639644
drwav wav;
640-
std::vector<uint8_t> wav_data; // used for pipe input from stdin
645+
std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output
641646

642647
if (fname == "-") {
643648
{
@@ -670,8 +675,19 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
670675
}
671676
}
672677
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
678+
#if defined(WHISPER_FFMPEG)
679+
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
680+
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
681+
return false;
682+
}
683+
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
684+
fprintf(stderr, "error: failed to read wav data as wav \n");
685+
return false;
686+
}
687+
#else
673688
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
674689
return false;
690+
#endif
675691
}
676692

677693
if (wav.channels != 1 && wav.channels != 2) {

examples/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ struct gpt_params {
2121
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
2222
int32_t n_predict = 200; // new tokens to predict
2323
int32_t n_parallel = 1; // number of parallel streams
24-
int32_t n_batch = 8; // batch size for prompt processing
24+
int32_t n_batch = 32; // batch size for prompt processing
2525
int32_t n_ctx = 2048; // context size (this is the KV cache max size)
2626
int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU
2727

@@ -185,7 +185,7 @@ class wav_writer {
185185
// It is assumed that PCM data is normalized to a range from -1 to 1
186186
bool write_audio(const float * data, size_t length) {
187187
for (size_t i = 0; i < length; ++i) {
188-
const int16_t intSample = data[i] * 32767;
188+
const int16_t intSample = int16_t(data[i] * 32767);
189189
file.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t));
190190
dataSize += sizeof(int16_t);
191191
}

examples/gpt-2/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,6 @@ if (GGML_CUBLAS)
3939
add_compile_definitions(GGML_USE_CUBLAS)
4040
endif()
4141

42-
if (GGML_CLBLAST)
43-
add_compile_definitions(GGML_USE_CLBLAST)
44-
endif()
45-
4642
if (GGML_METAL)
4743
add_compile_definitions(GGML_USE_METAL)
4844
endif()

examples/gpt-2/main-sched.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#include "ggml-metal.h"
1111
#endif
1212

13+
#ifdef GGML_USE_BLAS
14+
#include "ggml-blas.h"
15+
#endif
16+
1317
#include "common.h"
1418
#include "common-ggml.h"
1519

@@ -131,6 +135,16 @@ void init_backends(gpt2_model & model, const gpt_params & params) {
131135
model.backends.push_back(gpu_backend);
132136
}
133137

138+
#ifdef GGML_USE_BLAS
139+
ggml_backend_t blas_backend = ggml_backend_blas_init();
140+
if (!blas_backend) {
141+
fprintf(stderr, "%s: failed to initialize BLAS backend\n", __func__);
142+
} else {
143+
ggml_backend_blas_set_n_threads(blas_backend, params.n_threads);
144+
model.backends.push_back(blas_backend);
145+
}
146+
#endif
147+
134148
// always add the CPU backend as a fallback
135149
ggml_backend_t cpu_backend = ggml_backend_cpu_init();
136150
ggml_backend_cpu_set_n_threads(cpu_backend, params.n_threads);

0 commit comments

Comments
 (0)