File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -127,7 +127,7 @@ __device__ __forceinline__ T from_float(const float& inp) {
127
127
128
128
template <typename T>
129
129
__device__ __forceinline__ _B16x4 from_floatx4 (const floatx4& inp) {
130
- union tmpcvt {
130
+ [[maybe_unused]] union tmpcvt {
131
131
uint16_t u;
132
132
_Float16 f;
133
133
__hip_bfloat16 b;
@@ -160,7 +160,7 @@ __device__ __forceinline__ _B16x4 from_floatx4(const floatx4& inp) {
160
160
template <typename T>
161
161
__device__ __forceinline__ _B16x4 addx4 (const _B16x4& inp1,
162
162
const _B16x4& inp2) {
163
- union tmpcvt {
163
+ [[maybe_unused]] union tmpcvt {
164
164
uint16_t u;
165
165
_Float16 f;
166
166
__hip_bfloat16 b;
@@ -1273,9 +1273,9 @@ __launch_bounds__(NUM_THREADS) void paged_attention_ll4mi_reduce_kernel(
1273
1273
const int seq_idx = blockIdx .y ;
1274
1274
const int context_len = context_lens[seq_idx];
1275
1275
const int num_partitions = DIVIDE_ROUND_UP (context_len, PARTITION_SIZE);
1276
- constexpr int NUM_WARPS = NUM_THREADS / WARP_SIZE;
1276
+ [[maybe_unused]] constexpr int NUM_WARPS = NUM_THREADS / WARP_SIZE;
1277
1277
const int warpid = threadIdx .x / WARP_SIZE;
1278
- const int laneid = threadIdx .x % WARP_SIZE;
1278
+ [[maybe_unused]] const int laneid = threadIdx .x % WARP_SIZE;
1279
1279
1280
1280
__shared__ float shared_global_exp_sum;
1281
1281
// max num partitions supported is warp_size * NPAR_LOOPS
You can’t perform that action at this time.
0 commit comments