refactor

lialan · lialan · commit 198bdd3c3755 · 2026-01-23T11:33:55.000-08:00
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp
@@ -46,6 +46,16 @@ namespace mlir::iree_compiler::IREE::GPU {
 constexpr int64_t kCacheLineSizeBits = 128 * 8;
 constexpr int64_t kPreferredCopyNumBits = 128;
 
+bool targetSupportsGlobalLoadDMA(IREE::GPU::TargetAttr target) {
+  StringRef targetArch = target.getArch();
+  auto maybeChipset = amdgpu::Chipset::parse(targetArch);
+  if (failed(maybeChipset)) {
+    return false;
+  }
+  constexpr amdgpu::Chipset kGfx950{9, 5, 0};
+  return *maybeChipset >= kGfx950;
+}
+
 //===----------------------------------------------------------------------===//
 // Lowering Config Selection
 //===----------------------------------------------------------------------===//
@@ -927,19 +937,10 @@ getMatmulOrIGEMMLoweringConfigAndWorkgroupSize(
       {"subgroup", b.getI64ArrayAttr(subgroupTileSizes)},
       {"mma_kind", kind}};
 
-  // Check if target supports global load DMA (gfx950+).
-  bool supportsGlobalLoadDMA = false;
-  StringRef targetArch = target.getArch();
-  if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
-      succeeded(maybeChipset)) {
-    constexpr amdgpu::Chipset kGfx950{9, 5, 0};
-    supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
-  }
-
   // Use global load DMA attribute (subgroup sizes will be derived from
   // translation_info) only on gfx950+.
   SmallVector<Attribute> promotionArray;
-  if (supportsGlobalLoadDMA) {
+  if (targetSupportsGlobalLoadDMA(target)) {
     Attribute useGlobalDma = IREE::GPU::UseGlobalLoadDMAAttr::get(context);
     promotionArray = {useGlobalDma, useGlobalDma};
   }
@@ -1090,20 +1091,11 @@ setIGEMMConvolutionLoweringConfig(IREE::GPU::TargetAttr target,
   std::array<int64_t, 3> workgroupSize = {configAndWgSize->second, 1, 1};
   LoweringConfigAttr loweringConfig = configAndWgSize->first;
 
-  // Check if target supports global load DMA (gfx950+). Only disable bank
-  // conflict reduction for targets that will use direct load DMA.
-  bool supportsGlobalLoadDMA = false;
-  StringRef targetArch = target.getArch();
-  if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
-      succeeded(maybeChipset)) {
-    constexpr amdgpu::Chipset kGfx950{9, 5, 0};
-    supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
-  }
-
   SmallVector<NamedAttribute, 1> pipelineAttrs;
   auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
       linalgOp->getContext(), /*prefetchNumStages=*/2,
-      /*no_reduce_shared_memory_bank_conflicts=*/supportsGlobalLoadDMA,
+      /*no_reduce_shared_memory_bank_conflicts=*/
+      targetSupportsGlobalLoadDMA(target),
       /*use_igemm_convolution=*/true,
       /*reorder_workgroups_strategy=*/std::nullopt);
   pipelineAttrs.emplace_back(
@@ -1168,20 +1160,11 @@ LogicalResult setMatmulLoweringConfig(IREE::GPU::TargetAttr target,
   std::array<int64_t, 3> workgroupSize = {configAndWgSize->second, 1, 1};
   LoweringConfigAttr loweringConfig = configAndWgSize->first;
 
-  // Check if target supports global load DMA (gfx950+). Only disable bank
-  // conflict reduction for targets that will use direct load DMA.
-  bool supportsGlobalLoadDMA = false;
-  StringRef targetArch = target.getArch();
-  if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
-      succeeded(maybeChipset)) {
-    constexpr amdgpu::Chipset kGfx950{9, 5, 0};
-    supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
-  }
-
   SmallVector<NamedAttribute, 1> pipelineAttrs;
   auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
       linalgOp->getContext(), /*prefetchNumStages=*/2,
-      /*no_reduce_shared_memory_bank_conflicts=*/supportsGlobalLoadDMA,
+      /*no_reduce_shared_memory_bank_conflicts=*/
+      targetSupportsGlobalLoadDMA(target),
       /*use_igemm_convolution=*/false,
       /*reorder_workgroups_strategy=*/std::nullopt);
   pipelineAttrs.emplace_back(
diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.h
@@ -58,6 +58,10 @@ LogicalResult setSortConfig(IREE::GPU::TargetAttr target,
                             mlir::FunctionOpInterface entryPoint,
                             Operation *op);
 
+/// Returns true if the target supports global load DMA operations.
+/// Currently this is only supported on gfx950+ (CDNA4+) architectures.
+bool targetSupportsGlobalLoadDMA(IREE::GPU::TargetAttr target);
+
 /// Helper for setting up a memory bound reduction configuration, focusing
 /// on getting peak global memory bandwidth.
 LogicalResult setReductionConfig(IREE::GPU::TargetAttr target,
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
@@ -35,7 +35,6 @@
 #include "llvm/Support/InterleavedRange.h"
 #include "llvm/Support/LogicalResult.h"
 #include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/IR/Attributes.h"
@@ -1507,20 +1506,11 @@ static LogicalResult setContractConfig(IREE::GPU::TargetAttr target,
       auto configDict = b.getDictionaryAttr(attrs);
       auto loweringConfig =
           IREE::GPU::LoweringConfigAttr::get(context, configDict);
-      // Check if target supports global load DMA (gfx950+). Only disable bank
-      // conflict reduction for targets that will use direct load DMA.
-      bool supportsGlobalLoadDMA = false;
-      StringRef targetArch = target.getArch();
-      if (auto maybeChipset = amdgpu::Chipset::parse(targetArch);
-          succeeded(maybeChipset)) {
-        constexpr amdgpu::Chipset kGfx950{9, 5, 0};
-        supportsGlobalLoadDMA = (*maybeChipset >= kGfx950);
-      }
-
       SmallVector<NamedAttribute, 1> pipelineAttrs;
       auto pipelineOptions = IREE::GPU::GPUPipelineOptionsAttr::get(
           context, /*prefetch_num_stages=*/0,
-          /*no_reduce_shared_memory_bank_conflicts=*/supportsGlobalLoadDMA,
+          /*no_reduce_shared_memory_bank_conflicts=*/
+          IREE::GPU::targetSupportsGlobalLoadDMA(target),
           /*use_igemm_convolution=*/false,
           /*reorder_workgroups_strategy=*/std::nullopt);
       pipelineAttrs.emplace_back(