Skip to content

Commit 35f3572

Browse files
Revert "[ROCm] Enable group gemm through CK (pytorch#166334)"
This reverts commit 1fa520e. Reverted pytorch#166334 on behalf of https://github.com/atalman due to Internal build failures ([comment](pytorch#166334 (comment)))
1 parent bc5111c commit 35f3572

File tree

4 files changed

+2
-487
lines changed

4 files changed

+2
-487
lines changed

aten/src/ATen/native/cuda/GroupedBlas.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@
2222
#include <ATen/native/cuda/RowwiseScaledMM.h>
2323
#include <ATen/native/cuda/ScaledGroupMM.h>
2424
#include <ATen/native/cuda/GroupMM.h>
25-
#ifdef USE_ROCM
26-
#include <ATen/native/hip/ck_group_gemm.h>
27-
#endif
2825
#include <ATen/ceil_div.h>
2926

3027
#ifdef USE_FBGEMM_GENAI
@@ -639,19 +636,12 @@ std::optional<c10::ScalarType> out_dtype) {
639636
// _scaled_mm_allowed_device is used here within _grouped_mm_cuda which seems incorrect since scale is not used.
640637
// the _grouped_mm_fallback should be safe for any ROCm GPU since it's just calling typical mm/bmm
641638
bool use_fast_path = false;
642-
if (at::detail::getCUDAHooks().isGPUArch({"gfx942", "gfx950"})) {
643-
use_fast_path = true;
644-
}
645639
#endif
646640
const auto out_dtype_ = _resolve_grouped_mm_out_dtype(mat_a, mat_b, out_dtype);
647641
Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype_);
648642
if (use_fast_path) {
649643
// fast path, no d2h sync needed
650-
#ifndef USE_ROCM
651644
at::cuda::detail::bf16bf16_grouped_mm(mat_a, mat_b, offs, bias, out);
652-
#else
653-
at::hip::detail::group_gemm_ck(mat_a, mat_b, offs, bias, out);
654-
#endif
655645
} else {
656646
_grouped_mm_fallback(mat_a, mat_b, offs, bias, out_dtype, out);
657647
}

aten/src/ATen/native/hip/ck_group_gemm.h

Lines changed: 0 additions & 19 deletions
This file was deleted.

0 commit comments

Comments
 (0)