2929# include < cuda/std/__algorithm/min.h>
3030# include < cuda/std/__cstddef/types.h>
3131# include < cuda/std/__limits/numeric_limits.h>
32+ # include < cuda/std/__utility/unreachable.h>
3233# include < cuda/std/array>
3334# include < cuda/std/cstdint>
3435# include < cuda/std/span>
@@ -94,7 +95,7 @@ enum class tma_swizzle
9495 case tma_oob_fill::nan:
9596 return ::CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA;
9697 default :
97- _CCCL_UNREACHABLE ();
98+ ::cuda::std::unreachable ();
9899 }
99100}
100101
@@ -112,7 +113,7 @@ __to_cutensor_map(tma_l2_fetch_size __l2_fetch_size) noexcept
112113 case tma_l2_fetch_size::bytes256:
113114 return ::CU_TENSOR_MAP_L2_PROMOTION_L2_256B;
114115 default :
115- _CCCL_UNREACHABLE ();
116+ ::cuda::std::unreachable ();
116117 }
117118}
118119
@@ -128,7 +129,7 @@ __to_cutensor_map(tma_interleave_layout __interleave_layout) noexcept
128129 case tma_interleave_layout::bytes32:
129130 return ::CU_TENSOR_MAP_INTERLEAVE_32B;
130131 default :
131- _CCCL_UNREACHABLE ();
132+ ::cuda::std::unreachable ();
132133 }
133134}
134135
@@ -153,7 +154,7 @@ __to_cutensor_map(tma_interleave_layout __interleave_layout) noexcept
153154 return ::CU_TENSOR_MAP_SWIZZLE_128B_ATOM_64B;
154155# endif // _CCCL_CTK_AT_LEAST(12, 8)
155156 default :
156- _CCCL_UNREACHABLE ();
157+ ::cuda::std::unreachable ();
157158 }
158159}
159160
@@ -366,11 +367,11 @@ __get_tensor_sizes(const ::DLTensor& __tensor, int __rank, ::CUtensorMapDataType
366367{
367368 using ::cuda::std::int64_t ;
368369 __tma_strides_array_t __output_strides{1 }; // inner stride is implicit = 1
369- const auto __input_strides = __tensor.strides ;
370- const auto __input_sizes = __tensor.shape ;
371- const auto __alignment = (__interleave_layout == tma_interleave_layout::bytes32) ? 32 : 16 ;
372- constexpr auto __max_allowed_stride_bytes = int64_t {1 } << 40 ; // 2^40
373- int64_t __cumulative_size = 1 ;
370+ const auto __input_strides = __tensor.strides ;
371+ const auto __input_sizes = __tensor.shape ;
372+ const auto __alignment = (__interleave_layout == tma_interleave_layout::bytes32) ? 32 : 16 ;
373+ constexpr auto __max_allowed_stride_bytes = int64_t {1 } << 40 ; // 2^40
374+ [[maybe_unused]] int64_t __cumulative_size = 1 ;
374375 if (__input_strides == nullptr )
375376 {
376377 for (int __i = 0 ; __i < __rank - 1 ; ++__i)
0 commit comments