Skip to content

Commit 6ddda10

Browse files
author
ochafik
committed
Merge remote-tracking branch 'origin/master' into tool-diffs
2 parents 224101b + 3e0be1c commit 6ddda10

File tree

21 files changed

+336
-77
lines changed

21 files changed

+336
-77
lines changed

.github/actions/windows-setup-curl/action.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ inputs:
55
description: 'CURL version'
66
required: false
77
default: '8.6.0_6'
8+
architecture:
9+
description: 'Architecture of the libcurl to download'
10+
required: false
11+
default: 'win64'
812
outputs:
913
curl_path:
1014
description: "Path to the downloaded libcurl"
@@ -18,8 +22,9 @@ runs:
1822
shell: powershell
1923
env:
2024
CURL_VERSION: ${{ inputs.curl_version }}
25+
ARCHITECTURE: ${{ inputs.architecture }}
2126
run: |
22-
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
27+
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip"
2328
mkdir $env:RUNNER_TEMP/libcurl
2429
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
2530
echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT

.github/workflows/build-linux-cross.yml

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,94 @@ jobs:
140140
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
141141
142142
cmake --build build --config Release -j $(nproc)
143+
144+
ubuntu-24-ppc64el-cpu-cross:
145+
runs-on: ubuntu-24.04
146+
147+
steps:
148+
- uses: actions/checkout@v4
149+
- name: Setup PowerPC64le
150+
run: |
151+
sudo dpkg --add-architecture ppc64el
152+
153+
# Add arch-specific repositories for non-amd64 architectures
154+
cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
155+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
156+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
157+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
158+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
159+
EOF
160+
161+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
162+
163+
sudo apt-get install -y --no-install-recommends \
164+
build-essential \
165+
gcc-14-powerpc64le-linux-gnu \
166+
g++-14-powerpc64le-linux-gnu \
167+
libcurl4-openssl-dev:ppc64el
168+
169+
- name: Build
170+
run: |
171+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
172+
-DGGML_OPENMP=OFF \
173+
-DLLAMA_BUILD_EXAMPLES=ON \
174+
-DLLAMA_BUILD_TOOLS=ON \
175+
-DLLAMA_BUILD_TESTS=OFF \
176+
-DCMAKE_SYSTEM_NAME=Linux \
177+
-DCMAKE_SYSTEM_PROCESSOR=ppc64 \
178+
-DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
179+
-DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
180+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
181+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
182+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
183+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
184+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
185+
186+
cmake --build build --config Release -j $(nproc)
187+
188+
ubuntu-24-ppc64el-vulkan-cross:
189+
runs-on: ubuntu-24.04
190+
191+
steps:
192+
- uses: actions/checkout@v4
193+
- name: Setup PowerPC64le
194+
run: |
195+
sudo dpkg --add-architecture ppc64el
196+
197+
# Add arch-specific repositories for non-amd64 architectures
198+
cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
199+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
200+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
201+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
202+
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
203+
EOF
204+
205+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
206+
207+
sudo apt-get install -y --no-install-recommends \
208+
build-essential \
209+
glslc \
210+
gcc-14-powerpc64le-linux-gnu \
211+
g++-14-powerpc64le-linux-gnu \
212+
libvulkan-dev:ppc64el \
213+
libcurl4-openssl-dev:ppc64el
214+
215+
- name: Build
216+
run: |
217+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
218+
-DGGML_VULKAN=ON \
219+
-DGGML_OPENMP=OFF \
220+
-DLLAMA_BUILD_EXAMPLES=ON \
221+
-DLLAMA_BUILD_TOOLS=ON \
222+
-DLLAMA_BUILD_TESTS=OFF \
223+
-DCMAKE_SYSTEM_NAME=Linux \
224+
-DCMAKE_SYSTEM_PROCESSOR=ppc64 \
225+
-DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
226+
-DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
227+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
228+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
229+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
230+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
231+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
232+
233+
cmake --build build --config Release -j $(nproc)

.github/workflows/release.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,14 +238,19 @@ jobs:
238238
matrix:
239239
include:
240240
- build: 'cpu-x64'
241+
arch: 'x64'
241242
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
242243
#- build: 'openblas-x64'
244+
# arch: 'x64'
243245
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
244246
- build: 'vulkan-x64'
247+
arch: 'x64'
245248
defines: '-DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
246249
- build: 'cpu-arm64'
250+
arch: 'arm64'
247251
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF'
248252
- build: 'opencl-adreno-arm64'
253+
arch: 'arm64'
249254
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
250255

251256
steps:
@@ -312,6 +317,8 @@ jobs:
312317
- name: libCURL
313318
id: get_libcurl
314319
uses: ./.github/actions/windows-setup-curl
320+
with:
321+
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
315322

316323
- name: Build
317324
id: cmake_build
@@ -339,7 +346,7 @@ jobs:
339346
env:
340347
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
341348
run: |
342-
Copy-Item $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll
349+
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
343350
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
344351
345352
- name: Upload artifacts

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,4 +572,11 @@ automatically. For example:
572572
$ echo "source ~/.llama-completion.bash" >> ~/.bashrc
573573
```
574574
575-
## References
575+
## Dependencies
576+
577+
- [yhirose/cpp-httplib](https://github.com/yhirose/cpp-httplib) - Single-header HTTP server, used by `llama-server` - MIT license
578+
- [stb-image](https://github.com/nothings/stb) - Single-header image format decoder, used by multimodal subsystem - Public domain
579+
- [nlohmann/json](https://github.com/nlohmann/json) - Single-header JSON library, used by various tools/examples - MIT License
580+
- [minja](https://github.com/google/minja) - Minimal Jinja parser in C++, used by various tools/examples - MIT License
581+
- [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License
582+
- [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html)

common/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ if (LLAMA_LLGUIDANCE)
125125

126126
ExternalProject_Add(llguidance_ext
127127
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
128-
# v0.7.19 (+ fancy-regex build fix):
129-
GIT_TAG b59f98f85269892a7de3d3641ad155366f13daa6
128+
# v0.7.20 (+ fix to build on GCC 15):
129+
GIT_TAG b5b8b64dba11c4e4ee6b1d1450d3a3ae279891e8
130130
PREFIX ${CMAKE_BINARY_DIR}/llguidance
131131
SOURCE_DIR ${LLGUIDANCE_SRC}
132132
BUILD_IN_SOURCE TRUE

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,13 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
415415
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK192_HV128,
416416
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_H256,
417417
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK576_HV512,
418+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H64,
419+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H64,
420+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H64,
421+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_1_H64,
422+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_0_H64,
423+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_1_H64,
424+
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q8_0_H64,
418425
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H96,
419426
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H96,
420427
GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H96,
@@ -1362,6 +1369,13 @@ @implementation GGMLMetalClass
13621369
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK192_HV128, flash_attn_ext_q8_0_hk192_hv128, has_simdgroup_mm);
13631370
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_H256, flash_attn_ext_q8_0_h256, has_simdgroup_mm);
13641371
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_Q8_0_HK576_HV512, flash_attn_ext_q8_0_hk576_hv512, has_simdgroup_mm);
1372+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H64, flash_attn_ext_vec_f16_h64, has_simdgroup_reduction);
1373+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H64, flash_attn_ext_vec_bf16_h64, has_simdgroup_reduction && use_bfloat);
1374+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H64, flash_attn_ext_vec_q4_0_h64, has_simdgroup_reduction);
1375+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_1_H64, flash_attn_ext_vec_q4_1_h64, has_simdgroup_reduction);
1376+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_0_H64, flash_attn_ext_vec_q5_0_h64, has_simdgroup_reduction);
1377+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_1_H64, flash_attn_ext_vec_q5_1_h64, has_simdgroup_reduction);
1378+
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q8_0_H64, flash_attn_ext_vec_q8_0_h64, has_simdgroup_reduction);
13651379
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H96, flash_attn_ext_vec_f16_h96, has_simdgroup_reduction);
13661380
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H96, flash_attn_ext_vec_bf16_h96, has_simdgroup_reduction && use_bfloat);
13671381
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H96, flash_attn_ext_vec_q4_0_h96, has_simdgroup_reduction);
@@ -4358,7 +4372,7 @@ static bool ggml_metal_encode_node(
43584372
// TODO: add vec kernels for (ne00%64 == 0) and maybe also for (ne00%32 == 0)
43594373
// for now avoiding mainly to keep the number of templates/kernels a bit lower
43604374
// these are now trivial to add after: https://github.com/ggml-org/llama.cpp/pull/12612
4361-
if (ne01 >= 20 || (ne00%128 != 0 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
4375+
if (ne01 >= 20 || (ne00%128 != 0 && ne00 != 64 && ne00 != 96 && ne00 != 192 && ne00 != 576)) {
43624376
switch (src1->type) {
43634377
case GGML_TYPE_F16:
43644378
{
@@ -4539,6 +4553,24 @@ static bool ggml_metal_encode_node(
45394553
use_vec_kernel = true;
45404554

45414555
switch (ne00) {
4556+
case 64:
4557+
{
4558+
switch (src1->type) {
4559+
case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_F16_H64].pipeline; break;
4560+
case GGML_TYPE_BF16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_BF16_H64].pipeline; break;
4561+
case GGML_TYPE_Q4_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_0_H64].pipeline; break;
4562+
case GGML_TYPE_Q4_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q4_1_H64].pipeline; break;
4563+
case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_0_H64].pipeline; break;
4564+
case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q5_1_H64].pipeline; break;
4565+
case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_FLASH_ATTN_EXT_VEC_Q8_0_H64].pipeline; break;
4566+
default:
4567+
{
4568+
GGML_LOG_ERROR("unsupported type: %d\n", src1->type);
4569+
GGML_LOG_ERROR("add template specialization for this type\n");
4570+
GGML_ABORT("add template specialization for this type");
4571+
}
4572+
}
4573+
} break;
45424574
case 96:
45434575
{
45444576
switch (src1->type) {

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4124,6 +4124,16 @@ kernel void kernel_flash_attn_ext_vec(
41244124

41254125
typedef decltype(kernel_flash_attn_ext_vec<FA_TYPES, half4, 1, dequantize_f16_t4, half4, 1, dequantize_f16_t4, 128, 128, 4>) flash_attn_ext_vec_t;
41264126

4127+
template [[host_name("kernel_flash_attn_ext_vec_f16_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, half4, 1, dequantize_f16_t4, half4, 1, dequantize_f16_t4, 64, 64, 8>;
4128+
#if defined(GGML_METAL_USE_BF16)
4129+
template [[host_name("kernel_flash_attn_ext_vec_bf16_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, bfloat4, 1, dequantize_bf16_t4, bfloat4, 1, dequantize_bf16_t4, 64, 64, 8>;
4130+
#endif
4131+
template [[host_name("kernel_flash_attn_ext_vec_q4_0_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q4_0, 8, dequantize_q4_0_t4, block_q4_0, 8, dequantize_q4_0_t4, 64, 64, 8>;
4132+
template [[host_name("kernel_flash_attn_ext_vec_q4_1_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q4_1, 8, dequantize_q4_1_t4, block_q4_1, 8, dequantize_q4_1_t4, 64, 64, 8>;
4133+
template [[host_name("kernel_flash_attn_ext_vec_q5_0_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q5_0, 8, dequantize_q5_0_t4, block_q5_0, 8, dequantize_q5_0_t4, 64, 64, 8>;
4134+
template [[host_name("kernel_flash_attn_ext_vec_q5_1_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q5_1, 8, dequantize_q5_1_t4, block_q5_1, 8, dequantize_q5_1_t4, 64, 64, 8>;
4135+
template [[host_name("kernel_flash_attn_ext_vec_q8_0_h64")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, block_q8_0, 8, dequantize_q8_0_t4, block_q8_0, 8, dequantize_q8_0_t4, 64, 64, 8>;
4136+
41274137
template [[host_name("kernel_flash_attn_ext_vec_f16_h96")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, half4, 1, dequantize_f16_t4, half4, 1, dequantize_f16_t4, 96, 96, 4>;
41284138
#if defined(GGML_METAL_USE_BF16)
41294139
template [[host_name("kernel_flash_attn_ext_vec_bf16_h96")]] kernel flash_attn_ext_vec_t kernel_flash_attn_ext_vec<FA_TYPES, bfloat4, 1, dequantize_bf16_t4, bfloat4, 1, dequantize_bf16_t4, 96, 96, 4>;

0 commit comments

Comments
 (0)