Skip to content

Commit e06947e

Browse files
authored
Merge branch 'branch/3.2.x' into backport-3.2-tensormap-bugfix
2 parents 8baed27 + c6bd25d commit e06947e

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

libcudacxx/include/cuda/__tma/make_tma_descriptor.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ enum class tma_swizzle
9494
case tma_oob_fill::nan:
9595
return ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA;
9696
default:
97-
_CCCL_UNREACHABLE();
97+
::cuda::std::unreachable();
9898
}
9999
}
100100

@@ -112,7 +112,7 @@ __to_cutensor_map(tma_l2_fetch_size __l2_fetch_size) noexcept
112112
case tma_l2_fetch_size::bytes256:
113113
return ::CU_TENSOR_MAP_L2_PROMOTION_L2_256B;
114114
default:
115-
_CCCL_UNREACHABLE();
115+
::cuda::std::unreachable();
116116
}
117117
}
118118

@@ -128,7 +128,7 @@ __to_cutensor_map(tma_interleave_layout __interleave_layout) noexcept
128128
case tma_interleave_layout::bytes32:
129129
return ::CU_TENSOR_MAP_INTERLEAVE_32B;
130130
default:
131-
_CCCL_UNREACHABLE();
131+
::cuda::std::unreachable();
132132
}
133133
}
134134

@@ -153,7 +153,7 @@ __to_cutensor_map(tma_interleave_layout __interleave_layout) noexcept
153153
return ::CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B;
154154
# endif // _CCCL_CTK_AT_LEAST(12, 8)
155155
default:
156-
_CCCL_UNREACHABLE();
156+
::cuda::std::unreachable();
157157
}
158158
}
159159

@@ -405,11 +405,11 @@ __get_tensor_sizes(const ::DLTensor& __tensor, int __rank, ::CUtensorMapDataType
405405
{
406406
using ::cuda::std::int64_t;
407407
__tma_strides_array_t __output_strides{1}; // inner stride is implicit = 1
408-
const auto __input_strides = __tensor.strides;
409-
const auto __input_sizes = __tensor.shape;
410-
const auto __alignment = (__interleave_layout == tma_interleave_layout::bytes32) ? 32 : 16;
411-
constexpr auto __max_allowed_stride_bytes = int64_t{1} << 40; // 2^40
412-
int64_t __cumulative_size = 1;
408+
const auto __input_strides = __tensor.strides;
409+
const auto __input_sizes = __tensor.shape;
410+
const auto __alignment = (__interleave_layout == tma_interleave_layout::bytes32) ? 32 : 16;
411+
constexpr auto __max_allowed_stride_bytes = int64_t{1} << 40; // 2^40
412+
[[maybe_unused]] int64_t __cumulative_size = 1;
413413
if (__input_strides == nullptr)
414414
{
415415
for (int __i = 0; __i < __rank - 1; ++__i)

libcudacxx/test/libcudacxx/cuda/tma/make_tma_descriptor.pass.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ bool test_enums()
232232
cuda::tma_swizzle swizzle,
233233
cuda::tma_l2_fetch_size l2_fetch_size,
234234
cuda::tma_oob_fill oobfill) {
235-
tensor.dtype.bits = bits;
235+
tensor.dtype.bits = static_cast<uint8_t>(bits);
236236
box_sizes_storage[0] = /*min_align=*/16 * /*bits=*/8 / tensor.dtype.bits;
237237
box_sizes_storage[1] = /*min_align=*/16 * /*bits=*/8 / tensor.dtype.bits;
238238
box_sizes_storage[2] = /*min_align=*/16 * /*bits=*/8 / tensor.dtype.bits;
@@ -284,7 +284,7 @@ bool test_enums()
284284
kDLFloat8_e5m2fnuz,
285285
kDLFloat8_e8m0fnu})
286286
{
287-
tensor.dtype.code = code;
287+
tensor.dtype.code = static_cast<uint8_t>(code);
288288
constexpr int bits = 8;
289289
exec_make_tma_descriptor(bits, no_interleave, swizzle, l2_fetch_size, oobfill);
290290
}

0 commit comments

Comments
 (0)