Skip to content

Commit b5ade22

Browse files
authored
fix dynamic_per_group_scaled_quant_perf_drop dure to amd_buffer_coherence_enum (ROCm#1769)
1 parent 2a2b303 commit b5ade22

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

csrc/kernels/quant_kernels.cu

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// SPDX-License-Identifier: MIT
2-
// Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
2+
// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
33

44
#include "aiter_hip_common.h"
55
#include "dispatch_utils.h"
@@ -134,8 +134,7 @@ dynamic_per_group_scaled_quant_kernel(DTYPE_O* __restrict__ out,
134134
using DTYPE_STORE = typename ck_tile::vector_traits<DTYPE_O>::scalar_type;
135135
auto* out_ptr = reinterpret_cast<DTYPE_STORE*>(out);
136136
auto buffer_o =
137-
ck_tile::make_buffer_view<ck_tile::address_space_enum::global,
138-
ck_tile::amd_buffer_coherence_enum::glc>(out_ptr, oob_o);
137+
ck_tile::make_buffer_view<ck_tile::address_space_enum::global>(out_ptr, oob_o);
139138
buffer_o.init_raw();
140139

141140
auto out_s =

0 commit comments

Comments
 (0)