Skip to content

Commit 6af5a46

Browse files
authored
Merge branch 'branch/3.2.x' into backport-7034-to-branch/3.2.x
2 parents d281ab1 + c6bd25d commit 6af5a46

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

libcudacxx/include/cuda/__tma/make_tma_descriptor.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
# include <cuda/std/__algorithm/min.h>
3030
# include <cuda/std/__cstddef/types.h>
3131
# include <cuda/std/__limits/numeric_limits.h>
32+
# include <cuda/std/__utility/unreachable.h>
3233
# include <cuda/std/array>
3334
# include <cuda/std/cstdint>
3435
# include <cuda/std/span>
@@ -94,7 +95,7 @@ enum class tma_swizzle
9495
case tma_oob_fill::nan:
9596
return ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA;
9697
default:
97-
_CCCL_UNREACHABLE();
98+
::cuda::std::unreachable();
9899
}
99100
}
100101

@@ -112,7 +113,7 @@ __to_cutensor_map(tma_l2_fetch_size __l2_fetch_size) noexcept
112113
case tma_l2_fetch_size::bytes256:
113114
return ::CU_TENSOR_MAP_L2_PROMOTION_L2_256B;
114115
default:
115-
_CCCL_UNREACHABLE();
116+
::cuda::std::unreachable();
116117
}
117118
}
118119

@@ -128,7 +129,7 @@ __to_cutensor_map(tma_interleave_layout __interleave_layout) noexcept
128129
case tma_interleave_layout::bytes32:
129130
return ::CU_TENSOR_MAP_INTERLEAVE_32B;
130131
default:
131-
_CCCL_UNREACHABLE();
132+
::cuda::std::unreachable();
132133
}
133134
}
134135

@@ -153,7 +154,7 @@ __to_cutensor_map(tma_interleave_layout __interleave_layout) noexcept
153154
return ::CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B;
154155
# endif // _CCCL_CTK_AT_LEAST(12, 8)
155156
default:
156-
_CCCL_UNREACHABLE();
157+
::cuda::std::unreachable();
157158
}
158159
}
159160

@@ -366,11 +367,11 @@ __get_tensor_sizes(const ::DLTensor& __tensor, int __rank, ::CUtensorMapDataType
366367
{
367368
using ::cuda::std::int64_t;
368369
__tma_strides_array_t __output_strides{1}; // inner stride is implicit = 1
369-
const auto __input_strides = __tensor.strides;
370-
const auto __input_sizes = __tensor.shape;
371-
const auto __alignment = (__interleave_layout == tma_interleave_layout::bytes32) ? 32 : 16;
372-
constexpr auto __max_allowed_stride_bytes = int64_t{1} << 40; // 2^40
373-
int64_t __cumulative_size = 1;
370+
const auto __input_strides = __tensor.strides;
371+
const auto __input_sizes = __tensor.shape;
372+
const auto __alignment = (__interleave_layout == tma_interleave_layout::bytes32) ? 32 : 16;
373+
constexpr auto __max_allowed_stride_bytes = int64_t{1} << 40; // 2^40
374+
[[maybe_unused]] int64_t __cumulative_size = 1;
374375
if (__input_strides == nullptr)
375376
{
376377
for (int __i = 0; __i < __rank - 1; ++__i)

libcudacxx/test/libcudacxx/cuda/tma/make_tma_descriptor.pass.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ bool test_enums()
232232
cuda::tma_swizzle swizzle,
233233
cuda::tma_l2_fetch_size l2_fetch_size,
234234
cuda::tma_oob_fill oobfill) {
235-
tensor.dtype.bits = bits;
235+
tensor.dtype.bits = static_cast<uint8_t>(bits);
236236
box_sizes_storage[0] = /*min_align=*/16 * /*bits=*/8 / tensor.dtype.bits;
237237
box_sizes_storage[1] = /*min_align=*/16 * /*bits=*/8 / tensor.dtype.bits;
238238
box_sizes_storage[2] = /*min_align=*/16 * /*bits=*/8 / tensor.dtype.bits;
@@ -284,7 +284,7 @@ bool test_enums()
284284
kDLFloat8_e5m2fnuz,
285285
kDLFloat8_e8m0fnu})
286286
{
287-
tensor.dtype.code = code;
287+
tensor.dtype.code = static_cast<uint8_t>(code);
288288
constexpr int bits = 8;
289289
exec_make_tma_descriptor(bits, no_interleave, swizzle, l2_fetch_size, oobfill);
290290
}

0 commit comments

Comments
 (0)