Skip to content

Commit 9e5fe36

Browse files
committed
cuda 12.8 patch
1 parent 361420d commit 9e5fe36

File tree

2 files changed

+44
-3
lines changed

2 files changed

+44
-3
lines changed

recipe/meta.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,16 @@ source:
6262
- patches/0032-Remove-ambiguous-inherited-constructor-in-default_qu.patch
6363
# these came from https://git.yoctoproject.org/meta-tensorflow/tree/recipes-framework/tensorflow/files
6464
# check every release there for patches in the future
65-
- patches/0033-third_party-ducc-fix-ambiguous-failure.patch
66-
- patches/0034-third_party-tf_runtime-fix-compile-failure.patch
67-
- patches/0035-support-to-build-with-gcc-15.patch
65+
# - patches/0033-third_party-ducc-fix-ambiguous-failure.patch
66+
# - patches/0034-third_party-tf_runtime-fix-compile-failure.patch
67+
# - patches/0035-support-to-build-with-gcc-15.patch
6868
- patches/0036-third_party-eigen_archive-workaround-ice-failure-whi.patch
6969
# for our system absl
7070
- patches/0037-add-absl_tracing_internal.patch
7171
# for the megabuild
7272
- patches/0038-Fix-building-different-python-wheels-from-one-python.patch
7373
- patches/0039-Fix-matmul-unused-result-error.patch
74+
- patches/0040-Support-cuda-12.8.patch
7475
- url: https://github.com/tensorflow/estimator/archive/refs/tags/v{{ estimator_version.replace(".rc", "-rc") }}.tar.gz
7576
sha256: 2d7e100b1878084da34b5e23b49a0cbb5ee8a7add74b7dd189a82ada1cf85530
7677
folder: tensorflow-estimator
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
From c0be238cae714bd5e3ad67554bdf8068026073a1 Mon Sep 17 00:00:00 2001
2+
From: oqs <2227-loqs@users.noreply.gitlab.archlinux.org>
3+
Date: Wed, 6 Aug 2025 19:34:12 +0000
4+
Subject: [PATCH 40/40] Support cuda 12.8
5+
6+
---
7+
tensorflow/core/kernels/gpu_prim.h | 13 +++++--------
8+
1 file changed, 5 insertions(+), 8 deletions(-)
9+
10+
diff --git a/tensorflow/core/kernels/gpu_prim.h b/tensorflow/core/kernels/gpu_prim.h
11+
index bef22b50..f80bd54d 100644
12+
--- a/tensorflow/core/kernels/gpu_prim.h
13+
+++ b/tensorflow/core/kernels/gpu_prim.h
14+
@@ -44,10 +44,9 @@ __device__ __forceinline__ void ThreadStoreVolatilePtr<Eigen::half>(
15+
Eigen::numext::bit_cast<uint16_t>(val);
16+
}
17+
18+
-template <>
19+
-__device__ __forceinline__ Eigen::half ThreadLoadVolatilePointer<Eigen::half>(
20+
- Eigen::half *ptr, Int2Type<true> /*is_primitive*/) {
21+
- uint16_t result = *reinterpret_cast<volatile uint16_t *>(ptr);
22+
+__device__ __forceinline__ Eigen::half ThreadLoadVolatilePointer(
23+
+ const Eigen::half *ptr, Int2Type<true> /*is_primitive*/) {
24+
+ uint16_t result = *reinterpret_cast<volatile const uint16_t *>(ptr);
25+
return Eigen::numext::bit_cast<Eigen::half>(result);
26+
}
27+
28+
@@ -59,10 +58,8 @@ __device__ __forceinline__ void ThreadStoreVolatilePtr<Eigen::bfloat16>(
29+
Eigen::numext::bit_cast<uint16_t>(val);
30+
}
31+
32+
-template <>
33+
-__device__ __forceinline__ Eigen::bfloat16
34+
-ThreadLoadVolatilePointer<Eigen::bfloat16>(Eigen::bfloat16 *ptr,
35+
- Int2Type<true> /*is_primitive*/) {
36+
+__device__ __forceinline__ Eigen::bfloat16 ThreadLoadVolatilePointer(
37+
+ Eigen::bfloat16 *ptr, Int2Type<true> /*is_primitive*/) {
38+
uint16_t result = *reinterpret_cast<volatile uint16_t *>(ptr);
39+
return Eigen::numext::bit_cast<Eigen::bfloat16>(result);
40+
}

0 commit comments

Comments
 (0)