cuda 12.8 patch

isuruf · isuruf · commit 9e5fe3654af5 · 2025-08-06T19:36:22.000Z
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
@@ -62,15 +62,16 @@ source:
       - patches/0032-Remove-ambiguous-inherited-constructor-in-default_qu.patch
       # these came from https://git.yoctoproject.org/meta-tensorflow/tree/recipes-framework/tensorflow/files
       # check every release there for patches in the future
-      - patches/0033-third_party-ducc-fix-ambiguous-failure.patch
-      - patches/0034-third_party-tf_runtime-fix-compile-failure.patch
-      - patches/0035-support-to-build-with-gcc-15.patch
+      # - patches/0033-third_party-ducc-fix-ambiguous-failure.patch
+      # - patches/0034-third_party-tf_runtime-fix-compile-failure.patch
+      # - patches/0035-support-to-build-with-gcc-15.patch
       - patches/0036-third_party-eigen_archive-workaround-ice-failure-whi.patch
       # for our system absl
       - patches/0037-add-absl_tracing_internal.patch
       # for the megabuild
       - patches/0038-Fix-building-different-python-wheels-from-one-python.patch
       - patches/0039-Fix-matmul-unused-result-error.patch
+      - patches/0040-Support-cuda-12.8.patch
   - url: https://github.com/tensorflow/estimator/archive/refs/tags/v{{ estimator_version.replace(".rc", "-rc") }}.tar.gz
     sha256: 2d7e100b1878084da34b5e23b49a0cbb5ee8a7add74b7dd189a82ada1cf85530
     folder: tensorflow-estimator
diff --git a/recipe/patches/0040-Support-cuda-12.8.patch b/recipe/patches/0040-Support-cuda-12.8.patch
@@ -0,0 +1,40 @@
+From c0be238cae714bd5e3ad67554bdf8068026073a1 Mon Sep 17 00:00:00 2001
+From: oqs <2227-loqs@users.noreply.gitlab.archlinux.org>
+Date: Wed, 6 Aug 2025 19:34:12 +0000
+Subject: [PATCH 40/40] Support cuda 12.8
+
+---
+ tensorflow/core/kernels/gpu_prim.h | 13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+diff --git a/tensorflow/core/kernels/gpu_prim.h b/tensorflow/core/kernels/gpu_prim.h
+index bef22b50..f80bd54d 100644
+--- a/tensorflow/core/kernels/gpu_prim.h
++++ b/tensorflow/core/kernels/gpu_prim.h
+@@ -44,10 +44,9 @@ __device__ __forceinline__ void ThreadStoreVolatilePtr<Eigen::half>(
+       Eigen::numext::bit_cast<uint16_t>(val);
+ }
+ 
+-template <>
+-__device__ __forceinline__ Eigen::half ThreadLoadVolatilePointer<Eigen::half>(
+-    Eigen::half *ptr, Int2Type<true> /*is_primitive*/) {
+-  uint16_t result = *reinterpret_cast<volatile uint16_t *>(ptr);
++__device__ __forceinline__ Eigen::half ThreadLoadVolatilePointer(
++    const Eigen::half *ptr, Int2Type<true> /*is_primitive*/) {
++  uint16_t result = *reinterpret_cast<volatile const uint16_t *>(ptr);
+   return Eigen::numext::bit_cast<Eigen::half>(result);
+ }
+ 
+@@ -59,10 +58,8 @@ __device__ __forceinline__ void ThreadStoreVolatilePtr<Eigen::bfloat16>(
+       Eigen::numext::bit_cast<uint16_t>(val);
+ }
+ 
+-template <>
+-__device__ __forceinline__ Eigen::bfloat16
+-ThreadLoadVolatilePointer<Eigen::bfloat16>(Eigen::bfloat16 *ptr,
+-                                           Int2Type<true> /*is_primitive*/) {
++__device__ __forceinline__ Eigen::bfloat16 ThreadLoadVolatilePointer(
++    Eigen::bfloat16 *ptr, Int2Type<true> /*is_primitive*/) {
+   uint16_t result = *reinterpret_cast<volatile uint16_t *>(ptr);
+   return Eigen::numext::bit_cast<Eigen::bfloat16>(result);
+ }