diff --git a/recipe/0001-Change-gpuAddress-for-contents.patch b/recipe/0001-Change-gpuAddress-for-contents.patch deleted file mode 100644 index 27e4b16..0000000 --- a/recipe/0001-Change-gpuAddress-for-contents.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 228800186cab957319d187ecf6e92052ea2a3d22 Mon Sep 17 00:00:00 2001 -From: Julien Jerphanion -Date: Tue, 16 Sep 2025 11:28:30 +0200 -Subject: [PATCH] Change `gpuAddress` for `contents` - -Signed-off-by: Julien Jerphanion ---- - ggml/src/ggml-metal/ggml-metal-device.m | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m -index e38e70768..e0faebb66 100644 ---- a/ggml/src/ggml-metal/ggml-metal-device.m -+++ b/ggml/src/ggml-metal/ggml-metal-device.m -@@ -989,7 +989,7 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size, - } else { - res->buffers[0].metal = [res->device newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate]; - -- res->all_data = (void *) (res->buffers[0].metal.gpuAddress); -+ res->all_data = (void *) (res->buffers[0].metal.contents); - } - } - --- -2.51.0 - diff --git a/recipe/16576.patch b/recipe/16576.patch new file mode 100644 index 0000000..6fcd9f4 --- /dev/null +++ b/recipe/16576.patch @@ -0,0 +1,180 @@ +From a8d57d66096a65019e2354ff9efe23688794f72e Mon Sep 17 00:00:00 2001 +From: Georgi Gerganov +Date: Tue, 14 Oct 2025 14:11:18 +0300 +Subject: [PATCH 1/2] metal : avoid using Metal's gpuAddress property + +--- + ggml/src/ggml-metal/ggml-metal-device.m | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m +index c3fe8f4e91002..553cf8f5f39ac 100644 +--- a/ggml/src/ggml-metal/ggml-metal-device.m ++++ b/ggml/src/ggml-metal/ggml-metal-device.m +@@ -7,6 +7,8 @@ + + #include + ++#include ++ + #ifndef TARGET_OS_VISION + #define TARGET_OS_VISION 0 + #endif +@@ -22,6 +24,9 @@ + // overload of MTLGPUFamilyMetal3 (not available in some environments) + static const NSInteger MTLGPUFamilyMetal3_GGML = 5001; + ++// virtual address for GPU memory allocations ++static atomic_uintptr_t g_addr_device = 0x000000400ULL; ++ + #if !GGML_METAL_EMBED_LIBRARY + // Here to assist with NSBundle Path Hack + @interface GGMLMetalClass : NSObject +@@ -827,7 +832,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te + }; + + struct ggml_metal_buffer { +- void * all_data; // TODO: https://github.com/ggml-org/llama.cpp/pull/15985 ++ void * all_data; + size_t all_size; + + // if false, the Metal buffer data is allocated in private GPU memory and is not shared with the host +@@ -965,14 +970,15 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size, + if (shared) { + res->all_data = ggml_metal_host_malloc(size_aligned); + res->is_shared = true; +- res->owned = true; + } else { +- // dummy, non-NULL value - we'll populate this after creating the Metal buffer below +- res->all_data = (void *) 0x000000400ULL; ++ // use virtual address from g_addr_device counter ++ res->all_data = (void *) atomic_fetch_add_explicit(&g_addr_device, size_aligned, memory_order_relaxed); + res->is_shared = false; + } + res->all_size = size_aligned; + ++ res->owned = true; ++ + res->device = ggml_metal_device_get_obj(dev); + res->queue = ggml_metal_device_get_queue(dev); + +@@ -983,15 +989,13 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size, + res->buffers[0].metal = nil; + + if (size_aligned > 0) { +- if (props_dev->use_shared_buffers &&shared) { ++ if (props_dev->use_shared_buffers && shared) { + res->buffers[0].metal = [res->device newBufferWithBytesNoCopy:res->all_data + length:size_aligned + options:MTLResourceStorageModeShared + deallocator:nil]; + } else { + res->buffers[0].metal = [res->device newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate]; +- +- res->all_data = (void *) (res->buffers[0].metal.gpuAddress); + } + } + +@@ -1139,7 +1143,7 @@ bool ggml_metal_buffer_is_shared(ggml_metal_buffer_t buf) { + + void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { + if (buf->is_shared) { +- memset((char *)tensor->data + offset, value, size); ++ memset((char *) tensor->data + offset, value, size); + return; + } + +@@ -1168,7 +1172,7 @@ void ggml_metal_buffer_memset_tensor(ggml_metal_buffer_t buf, struct ggml_tensor + + void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { + if (buf->is_shared) { +- memcpy((char *)tensor->data + offset, data, size); ++ memcpy((char *) tensor->data + offset, data, size); + return; + } + +@@ -1223,7 +1227,7 @@ void ggml_metal_buffer_set_tensor(ggml_metal_buffer_t buf, struct ggml_tensor * + + void ggml_metal_buffer_get_tensor(ggml_metal_buffer_t buf, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { + if (buf->is_shared) { +- memcpy(data, (const char *)tensor->data + offset, size); ++ memcpy(data, (const char *) tensor->data + offset, size); + return; + } + + +From 84e3d8d26961cca81de65b1790506121dda45bf5 Mon Sep 17 00:00:00 2001 +From: Georgi Gerganov +Date: Tue, 14 Oct 2025 14:44:01 +0300 +Subject: [PATCH 2/2] metal : fix rope kernels buffer check + +--- + ggml/src/ggml-metal/ggml-metal-impl.h | 1 + + ggml/src/ggml-metal/ggml-metal-ops.cpp | 1 + + ggml/src/ggml-metal/ggml-metal.metal | 8 ++++---- + 3 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/ggml/src/ggml-metal/ggml-metal-impl.h b/ggml/src/ggml-metal/ggml-metal-impl.h +index a448c14f66b63..fa2d82cefb40e 100644 +--- a/ggml/src/ggml-metal/ggml-metal-impl.h ++++ b/ggml/src/ggml-metal/ggml-metal-impl.h +@@ -251,6 +251,7 @@ typedef struct { + int32_t sect_1; + int32_t sect_2; + int32_t sect_3; ++ bool src2; + } ggml_metal_kargs_rope; + + typedef struct { +diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp +index a61ea8fb5a7b3..784b7b77851e6 100644 +--- a/ggml/src/ggml-metal/ggml-metal-ops.cpp ++++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp +@@ -2969,6 +2969,7 @@ int ggml_metal_op_rope(ggml_metal_op_t ctx, int idx) { + /* sect_1 =*/ sect_1, + /* sect_2 =*/ sect_2, + /* sect_3 =*/ sect_3, ++ /* src2 =*/ op->src[2] != nullptr, + }; + + ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_rope(lib, op); +diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal +index 1029cf8f9a3ab..6d39ddcc634ef 100644 +--- a/ggml/src/ggml-metal/ggml-metal.metal ++++ b/ggml/src/ggml-metal/ggml-metal.metal +@@ -3748,7 +3748,7 @@ kernel void kernel_rope_norm( + + const float theta = theta_base * pow(args.freq_base, inv_ndims*i0); + +- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f; ++ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f; + + rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta); + +@@ -3801,7 +3801,7 @@ kernel void kernel_rope_neox( + + const float theta = theta_base * pow(args.freq_base, inv_ndims*i0); + +- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f; ++ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f; + + rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta); + +@@ -3872,7 +3872,7 @@ kernel void kernel_rope_multi( + + const float theta = theta_base * pow(args.freq_base, inv_ndims*i0); + +- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f; ++ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f; + + rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta); + +@@ -3939,7 +3939,7 @@ kernel void kernel_rope_vision( + const float theta = theta_base * pow(args.freq_base, 2.0f * inv_ndims * p); + // end of mrope + +- const float freq_factor = src2 != src0 ? ((device const float *) src2)[ic] : 1.0f; ++ const float freq_factor = args.src2 ? ((device const float *) src2)[ic] : 1.0f; + + rope_yarn(theta/freq_factor, args.freq_scale, corr_dims, i0, args.ext_factor, args.attn_factor, &cos_theta, &sin_theta); + diff --git a/recipe/recipe.yaml b/recipe/recipe.yaml index af341ae..5c721ac 100644 --- a/recipe/recipe.yaml +++ b/recipe/recipe.yaml @@ -11,7 +11,8 @@ source: url: https://github.com/ggml-org/${{ name }}/archive/b${{ version | split(".") | list | last }}.tar.gz sha256: bfe625422c8fa74cf12d1d6aff8bdbbe61c86647de1615c2e7b6f0cde4804e18 patches: - - 0001-Change-gpuAddress-for-contents.patch + # See: https://github.com/ggml-org/llama.cpp/pull/16576/ + - 16576.patch build: number: ${{ build }}