GPU known subgroup size #112732

FMarno · 2024-10-17T15:36:36Z

No description provided.

llvmbot · 2024-10-17T15:37:16Z

@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Finlay (FMarno)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/112732.diff

4 Files Affected:

(modified) mlir/include/mlir/Dialect/GPU/IR/GPUBase.td (+2-1)
(modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+8-1)
(modified) mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp (+39-8)
(modified) mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir (+4-6)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 860f8933672038..fb9df5067a31b0 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -64,7 +64,8 @@ def GPU_Dialect : Dialect {
 
   let discardableAttrs = (ins
     "::mlir::DenseI32ArrayAttr":$known_block_size,
-    "::mlir::DenseI32ArrayAttr":$known_grid_size
+    "::mlir::DenseI32ArrayAttr":$known_grid_size,
+    "::mlir::IntegerAttr" : $known_subgroup_size
   );
 
   let dependentDialects = ["arith::ArithDialect"];
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6098eb34d04d52..d4779d1b47a42d 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -388,6 +388,12 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
     by using `gpu.known_block_size` or `gpu.known_grid_size`, but this carries
     the risk that they will de discarded.
 
+    A function may optionally be annotated with the subgroup size that will be
+    used when it is launched using the `known_subgroup_size` attribute. If set,
+    this attribute is a single positive integer (i.e. > 0). Launching a function
+    with this annotation, using a subgroup size other than specified is
+    undefined behaviour.
+
     Syntax:
 
     ```
@@ -431,7 +437,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
                        OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
                        OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
                        GPU_OptionalDimSizeHintAttr:$known_block_size,
-                       GPU_OptionalDimSizeHintAttr:$known_grid_size);
+                       GPU_OptionalDimSizeHintAttr:$known_grid_size,
+                       OptionalAttr<I32Attr>:$known_subgroup_size);
   let regions = (region AnyRegion:$body);
 
   let skipDefaultBuilders = 1;
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 544f1f4a4f6a79..2a5e5de2357641 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -271,24 +271,55 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
                          typeMangling.value());
   }
 
+  static std::optional<uint32_t>
+  getIntelReqdSubGroupSize(FunctionOpInterface func) {
+    constexpr llvm::StringLiteral discardableIntelReqdSubgroupSize =
+        "llvm.intel_reqd_sub_group_size";
+    IntegerAttr reqdSubgroupSizeAttr = llvm::cast_if_present<IntegerAttr>(
+        func->getAttr(discardableIntelReqdSubgroupSize));
+    if (!reqdSubgroupSizeAttr)
+      return {};
+
+    return reqdSubgroupSizeAttr.getInt();
+  }
+
   /// Get the subgroup size from the target or return a default.
-  static int getSubgroupSize(Operation *op) {
-    return spirv::lookupTargetEnvOrDefault(op)
-        .getResourceLimits()
-        .getSubgroupSize();
+  static std::optional<uint32_t>
+  getKnownSubgroupSize(FunctionOpInterface func) {
+    IntegerAttr knownSubgroupSizeAttr =
+        mlir::gpu::GPUDialect::KnownSubgroupSizeAttrHelper(func->getContext())
+            .getAttr(func);
+    if (!knownSubgroupSizeAttr)
+      return {};
+
+    return knownSubgroupSizeAttr.getInt();
   }
 
-  static bool hasValidWidth(gpu::ShuffleOp op) {
+  static std::optional<uint32_t> getSubgroupSize(Operation *op) {
+    FunctionOpInterface func = op->getParentOfType<FunctionOpInterface>();
+    if (!func)
+      return {};
+    auto knownSubgroupSize = getKnownSubgroupSize(func);
+    if (knownSubgroupSize)
+      return knownSubgroupSize;
+    return getIntelReqdSubGroupSize(func);
+  }
+
+  static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
     llvm::APInt val;
     Value width = op.getWidth();
-    return matchPattern(width, m_ConstantInt(&val)) &&
-           val == getSubgroupSize(op);
+    return matchPattern(width, m_ConstantInt(&val)) && val == subgroupSize;
   }
 
   LogicalResult
   matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const final {
-    if (!hasValidWidth(op))
+    auto maybeSubgroupSize = getSubgroupSize(op);
+    if (!maybeSubgroupSize)
+      return rewriter.notifyMatchFailure(
+          op, "subgroup size not specified. Should be specified with "
+              "known_subgroup_size.");
+    if (!hasValidWidth(op, maybeSubgroupSize.value()))
       return rewriter.notifyMatchFailure(
           op, "shuffle width and subgroup size mismatch");
 
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 910105ddf69586..467d15e5c2ef2b 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -260,7 +260,7 @@ gpu.module @shuffles {
   func.func @gpu_shuffles(%val0: i32, %id: i32,
                           %val1: i64, %mask: i32,
                           %val2: f32, %delta_up: i32,
-                          %val3: f64, %delta_down: i32) {
+                          %val3: f64, %delta_down: i32) attributes { llvm.intel_reqd_sub_group_size = 32 : i32 } {
     %width = arith.constant 32 : i32
     // CHECK:         llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
     // CHECK-SAME-DAG:  no_unwind
@@ -302,9 +302,7 @@ gpu.module @shuffles {
 
 // Check `gpu.shuffle` conversion with explicit subgroup size.
 
-gpu.module @shuffles attributes {
-  spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
-} {
+gpu.module @shuffles {
   // CHECK:           llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
   // CHECK-SAME-DAG:  no_unwind
   // CHECK-SAME-DAG:  convergent
@@ -352,7 +350,7 @@ gpu.module @shuffles attributes {
   // CHECK-SAME:              (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
   // CHECK-SAME:               %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
   // CHECK-SAME:               %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
-  // CHECK-SAME:               %[[F64_VAL:.*]]: f64,  %[[OFFSET:.*]]: i32) {
+  // CHECK-SAME:               %[[F64_VAL:.*]]: f64,  %[[OFFSET:.*]]: i32)
   func.func @gpu_shuffles(%i8_val: i8,
                           %i16_val: i16,
                           %i32_val: i32,
@@ -360,7 +358,7 @@ gpu.module @shuffles attributes {
                           %f16_val: f16,
                           %f32_val: f32,
                           %f64_val: f64,
-                          %offset: i32) {
+                          %offset: i32) attributes {gpu.known_subgroup_size = 16 : i32} {
     %width = arith.constant 16 : i32
     // CHECK:         llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
     // CHECK:         llvm.mlir.constant(true) : i1

FMarno · 2024-10-17T15:37:42Z

mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp


+  static std::optional<uint32_t>
+  getIntelReqdSubGroupSize(FunctionOpInterface func) {
+    constexpr llvm::StringLiteral discardableIntelReqdSubgroupSize =


It would be good if we could get this from a function like a IntelReqdSubgroupSizeAttrName function

FMarno · 2024-10-17T15:43:01Z

mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir

  // CHECK-SAME:               %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
  // CHECK-SAME:               %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
-  // CHECK-SAME:               %[[F64_VAL:.*]]: f64,  %[[OFFSET:.*]]: i32) {
+  // CHECK-SAME:               %[[F64_VAL:.*]]: f64,  %[[OFFSET:.*]]: i32)


Suggested change

// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)

// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) attributes {gpu.known_subgroup_size = 16 : i32} {

include the attribute in the check

FMarno · 2024-10-17T15:45:08Z

Sorry, I didn't mean to open this PR on this repo

FMarno added 4 commits October 14, 2024 11:22

WIP add known subgroup size

4c61ea3

allow gpu.known_subgroup_size on any op

469376a

make known subgroup size a inherent and discardable attribute

6bd9b88

hacky support for intel_reqd_sub_group_size

3993812

llvmbot added mlir:gpu mlir labels Oct 17, 2024

FMarno commented Oct 17, 2024

View reviewed changes

FMarno closed this Oct 17, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

GPU known subgroup size #112732

GPU known subgroup size #112732

Uh oh!

FMarno commented Oct 17, 2024

Uh oh!

llvmbot commented Oct 17, 2024 •

edited

Loading

Uh oh!

FMarno Oct 17, 2024

Uh oh!

FMarno Oct 17, 2024

Uh oh!

FMarno commented Oct 17, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

	// CHECK-SAME: %[[F64_VAL:.]]: f64, %[[OFFSET:.]]: i32)
	// CHECK-SAME: %[[F64_VAL:.]]: f64, %[[OFFSET:.]]: i32) attributes {gpu.known_subgroup_size = 16 : i32} {

GPU known subgroup size #112732

GPU known subgroup size #112732

Uh oh!

Conversation

FMarno commented Oct 17, 2024

Uh oh!

llvmbot commented Oct 17, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

FMarno Oct 17, 2024

Choose a reason for hiding this comment

Uh oh!

FMarno Oct 17, 2024

Choose a reason for hiding this comment

Uh oh!

FMarno commented Oct 17, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

llvmbot commented Oct 17, 2024 •

edited

Loading