AnacondaRecipes · xkong-anaconda · Nov 19, 2025
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
@@ -1,6 +1,6 @@
 {% set name = "llama.cpp-meta" %}
-{% set upstream_release = "b6872" %}
-{% set upstream_commit = "f549b0007dbdd683215820f7229ce180a12b191d" %}
+{% set upstream_release = "b6188" %}
+{% set upstream_commit = "21c17b5befc5f6be5992bc87fc1ba99d388561df" %}
 {% set version = "0.0." + upstream_release[1:] %}
 {% set gguf_version = "0.17.1." + upstream_release[1:] %}
 {% set build_number = 0 %}
@@ -22,13 +22,13 @@ package:
 
 source:
   url: https://github.com/ggml-org/llama.cpp/archive/{{ upstream_release }}.tar.gz
-  sha256: 5dcab3a9c071ee296788083c3b8380e9d52b00720b34f4aa5ab9644be23f79cb
+  sha256: aba3d07942daa048d46cc7fddebc33d839e89e256306428910dcd582597c0b97
 
   patches:
     - patches/mkl.patch                     # [blas_impl == "mkl"]
     - patches/metal_gpu_selection.patch     # [osx]
-    - patches/disable-metal-bf16.patch      # [osx]
-    - patches/disable-metal-flash-attention.patch  # [osx]
+    # Note: disable-metal-bf16.patch not needed for b6188 (BF16 is OFF by default)
+    # Note: disable-metal-flash-attention.patch not needed for b6188
     - patches/hwcap_sve_check.patch         # [linux and aarch64]
     - patches/no-armv9-support-gcc11.patch  # [linux and aarch64]
     - patches/increase-nmse-tolerance.patch

diff --git a/recipe/patches/increase-nmse-tolerance-aarch64.patch b/recipe/patches/increase-nmse-tolerance-aarch64.patch
@@ -15,16 +15,16 @@ for architecture-specific precision differences.
 Applies on top of increase-nmse-tolerance.patch (5e-4 -> 5e-3).
 This patch further increases: 5e-3 -> 1e-1 for aarch64 only.
 
-Updated for b6872: Line numbers adjusted for latest upstream code.
+Updated for b6188: Regenerated for older codebase with 5 test classes.
 ---
- tests/test-backend-ops.cpp | 14 +++++++-------
- 1 file changed, 7 insertions(+), 7 deletions(-)
+ tests/test-backend-ops.cpp | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
 index 0e696ef47..a2efa938 100644
 --- a/tests/test-backend-ops.cpp
 +++ b/tests/test-backend-ops.cpp
-@@ -3318,7 +3318,7 @@
+@@ -3104,7 +3104,7 @@
      }
 
      double max_nmse_err() override {
@@ -33,7 +33,7 @@ index 0e696ef47..a2efa938 100644
      }
 
      int64_t grad_nmax() override {
-@@ -3434,7 +3434,7 @@
+@@ -3207,7 +3207,7 @@
      }
 
      double max_nmse_err() override {
@@ -42,7 +42,7 @@ index 0e696ef47..a2efa938 100644
      }
 
      uint64_t op_flops(ggml_tensor * t) override {
-@@ -3523,7 +3523,7 @@
+@@ -3282,7 +3282,7 @@
      }
 
      double max_nmse_err() override {
@@ -51,25 +51,7 @@ index 0e696ef47..a2efa938 100644
      }
 
      test_out_prod(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
-@@ -4248,7 +4248,7 @@
-     }
-
-     double max_nmse_err() override {
--        return 5e-3; // The default 1e-7 is too small for Vulkan.
-+        return 1e-1; // The default 1e-7 is too small for Vulkan and ARM64 BLAS.
-     }
-
-     test_conv_transpose_2d(std::array<int64_t, 4> ne_input = {10, 10, 3, 1}, // [input_width, input_height, input_channels, 1]
-@@ -4400,7 +4400,7 @@
-     }
-
-     double max_nmse_err() override {
--        return 5e-3;
-+        return 1e-1;
-     }
-
-     uint64_t op_flops(ggml_tensor * t) override {
-@@ -4532,7 +4532,7 @@
+@@ -3954,7 +3954,7 @@
      }
 
      double max_nmse_err() override {
@@ -78,7 +60,7 @@ index 0e696ef47..a2efa938 100644
      }
 
      uint64_t op_flops(ggml_tensor * t) override {
-@@ -5386,7 +5386,7 @@
+@@ -4579,7 +4579,7 @@
      }
 
      double max_nmse_err() override {

diff --git a/recipe/patches/increase-nmse-tolerance.patch b/recipe/patches/increase-nmse-tolerance.patch
@@ -1,23 +1,24 @@
-From 49f8a96212d0d7ae43d3f006dbc37adb9360b6e2 Mon Sep 17 00:00:00 2001
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Charles Bousseau <[email protected]>
 Date: Mon, 22 Sep 2025 20:58:45 -0400
 Subject: [PATCH] tests: increase NMSE tolerance for matrix operations
 
 Fixes numerical precision failures due to floating-point rounding errors.
 This was observed on Windows instance for CUDA builds, and on CI for osx metal.
 
-Updated for b6653: Only test_mul_mat and related operations need adjustment now,
-as test_cpy and test_set_rows have been fixed upstream with appropriate tolerances.
+Updated for b6188: Regenerated for older codebase with different test structure.
+Changes 5 test classes: test_mul_mat, test_mul_mat_id, test_out_prod,
+test_conv_2d, and test_flash_attn_ext.
 
 ---
- tests/test-backend-ops.cpp | 14 +++++++-------
- 1 file changed, 7 insertions(+), 7 deletions(-)
+ tests/test-backend-ops.cpp | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
-index f11eecd8e..0e696ef47 100644
+index 1234567..abcdefg 100644
 --- a/tests/test-backend-ops.cpp
 +++ b/tests/test-backend-ops.cpp
-@@ -3254,7 +3254,7 @@
+@@ -3104,7 +3104,7 @@
      }
 
      double max_nmse_err() override {
@@ -26,7 +27,7 @@ index f11eecd8e..0e696ef47 100644
      }
 
      int64_t grad_nmax() override {
-@@ -3370,7 +3370,7 @@
+@@ -3207,7 +3207,7 @@
      }
 
      double max_nmse_err() override {
@@ -35,7 +36,7 @@ index f11eecd8e..0e696ef47 100644
      }
 
      uint64_t op_flops(ggml_tensor * t) override {
-@@ -3459,7 +3459,7 @@
+@@ -3282,7 +3282,7 @@
      }
 
      double max_nmse_err() override {
@@ -44,25 +45,7 @@ index f11eecd8e..0e696ef47 100644
      }
 
      test_out_prod(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
-@@ -4053,7 +4053,7 @@
-     }
-
-     double max_nmse_err() override {
--        return 5e-4; // The default 1e-7 is too small for Vulkan.
-+        return 5e-3; // The default 1e-7 is too small for Vulkan.
-     }
-
-     test_conv_transpose_2d(std::array<int64_t, 4> ne_input = {10, 10, 3, 1}, // [input_width, input_height, input_channels, 1]
-@@ -4205,7 +4205,7 @@
-     }
-
-     double max_nmse_err() override {
--        return 5e-4;
-+        return 5e-3;
-     }
-
-     uint64_t op_flops(ggml_tensor * t) override {
-@@ -4337,7 +4337,7 @@
+@@ -3954,7 +3954,7 @@
      }
 
      double max_nmse_err() override {
@@ -71,7 +54,7 @@ index f11eecd8e..0e696ef47 100644
      }
 
      uint64_t op_flops(ggml_tensor * t) override {
-@@ -5032,7 +5032,7 @@
+@@ -4579,7 +4579,7 @@
      }
 
      double max_nmse_err() override {

diff --git a/recipe/patches/metal_gpu_selection.patch b/recipe/patches/metal_gpu_selection.patch
@@ -3,48 +3,48 @@ From: Charles Bousseau <[email protected]>
 Date: Sun, 20 Jul 2025 14:03:26 -0400
 Subject: [PATCH] metal gpu selection
 
-In macOS, in order for the system to provide a default Metal device object, you must link to the Core Graphics framework. 
+In macOS, in order for the system to provide a default Metal device object, you must link to the Core Graphics framework.
 You usually need to do this explicitly if you're writing apps that don't use graphics by default, such as command line tools.
 https://developer.apple.com/documentation/metal/1433401-mtlcreatesystemdefaultdevice?language=objc
 Systems with Apple silicon only have one GPU, which removes the need to choose a GPU.
 https://developer.apple.com/documentation/metal/mtldevice/1433409-lowpower#discussion
 
 I did try linking to CoreGraphics, but MTLCreateSystemDefaultDevice was still returning nil.
 
-Updated for b6653: File renamed from ggml/src/ggml-metal/ggml-metal.m to ggml-metal-device.m
+Updated for b6188: File is ggml-metal.m (not ggml-metal-device.m)
 ---
- ggml/src/ggml-metal/ggml-metal-device.m | 19 +++++++++++++++++++
+ ggml/src/ggml-metal/ggml-metal.m | 19 +++++++++++++++++++
  1 file changed, 19 insertions(+)
 
-diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m
+diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
 index dc391a0d4..2083e2a31 100644
---- a/ggml/src/ggml-metal/ggml-metal-device.m
-+++ b/ggml/src/ggml-metal/ggml-metal-device.m
-@@ -449,6 +449,25 @@ ggml_metal_device_t ggml_metal_device_init(void) {
+--- a/ggml/src/ggml-metal/ggml-metal.m
++++ b/ggml/src/ggml-metal/ggml-metal.m
+@@ -91,6 +91,25 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev
 
-     if (dev->mtl_device == nil) {
-         dev->mtl_device = MTLCreateSystemDefaultDevice();
-+        if (dev->mtl_device == nil) {
+     if (ctx->mtl_device == nil) {
+         ctx->mtl_device = MTLCreateSystemDefaultDevice();
++        if (ctx->mtl_device == nil) {
 +          /*
 +            In macOS, in order for the system to provide a default Metal device object, you must link to the Core Graphics framework.
 +            You usually need to do this explicitly if you're writing apps that don't use graphics by default, such as command line tools.
-+            > https://developer.apple.com/documentation/metal/1433401-mtlcreatesystemdefaultdevice?language=objc
++            https://developer.apple.com/documentation/metal/1433401-mtlcreatesystemdefaultdevice?language=objc
 +            Systems with Apple silicon only have one GPU, which removes the need to choose a GPU.
-+            > https://developer.apple.com/documentation/metal/mtldevice/1433409-lowpower#discussion
-+           */
-+            NSArray * devices = MTLCopyAllDevices();
-+            for (id<MTLDevice> dev_tmp in devices) {
-+                if (dev_tmp != nil) {
-+                    if (dev->mtl_device == nil) {
-+                        dev->mtl_device = dev_tmp;
-+                    } else {
-+                        [dev_tmp release];
-+                    }
-+                }
++            https://developer.apple.com/documentation/metal/mtldevice/1433409-lowpower#discussion
++          */
++          NSArray<id<MTLDevice>> * devices = MTLCopyAllDevices();
++          if (devices.count > 0) {
++            for (id<MTLDevice> d in devices) {
++              if (!d.isLowPower) {
++                ctx->mtl_device = d;
++                break;
++              }
 +            }
++          }
++          [devices release];
 +        }
 
-         if (dev->mtl_device) {
-             dev->mtl_queue = [dev->mtl_device newCommandQueue];
+         ctx->has_simdgroup_reduction  = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
+         ctx->has_simdgroup_reduction |= [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
 --
 2.39.5 (Apple Git-154)
diff --git a/recipe/patches/mkl.patch b/recipe/patches/mkl.patch
@@ -7,9 +7,9 @@ Fix MKL BLAS detection and configuration logic.
 The condition needs to properly handle both Intel MKL vendor setting
 and generic vendor with MKL include paths.
 
-Updated for b6653: Adjusted for quoted ${BLAS_INCLUDE_DIRS} variable.
+Updated for b6188: Uses unquoted variable syntax (older CMake style).
 
-Co-Authored-By: Patrick Sodré <[email protected]>
+Co-Authored-By: Patrick Sodre <[email protected]>
 ---
  ggml/src/ggml-blas/CMakeLists.txt | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
@@ -22,8 +22,8 @@ index 0bf3c05d..a2efa938 100644
 
      target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
 
--    if ("${BLAS_INCLUDE_DIRS}" MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
-+    if (("${BLAS_INCLUDE_DIRS}" MATCHES "mkl" AND ${GGML_BLAS_VENDOR} MATCHES "Generic") OR ${GGML_BLAS_VENDOR} MATCHES "Intel")
+-    if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
++    if ((${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND ${GGML_BLAS_VENDOR} MATCHES "Generic") OR ${GGML_BLAS_VENDOR} MATCHES "Intel")
          add_compile_definitions(GGML_BLAS_USE_MKL)
      endif()