-
Notifications
You must be signed in to change notification settings - Fork 15.2k
GPU known subgroup size #112732
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GPU known subgroup size #112732
Conversation
|
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir Author: Finlay (FMarno) ChangesFull diff: https://github.com/llvm/llvm-project/pull/112732.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
index 860f8933672038..fb9df5067a31b0 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUBase.td
@@ -64,7 +64,8 @@ def GPU_Dialect : Dialect {
let discardableAttrs = (ins
"::mlir::DenseI32ArrayAttr":$known_block_size,
- "::mlir::DenseI32ArrayAttr":$known_grid_size
+ "::mlir::DenseI32ArrayAttr":$known_grid_size,
+ "::mlir::IntegerAttr" : $known_subgroup_size
);
let dependentDialects = ["arith::ArithDialect"];
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6098eb34d04d52..d4779d1b47a42d 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -388,6 +388,12 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
by using `gpu.known_block_size` or `gpu.known_grid_size`, but this carries
the risk that they will de discarded.
+ A function may optionally be annotated with the subgroup size that will be
+ used when it is launched using the `known_subgroup_size` attribute. If set,
+ this attribute is a single positive integer (i.e. > 0). Launching a function
+ with this annotation, using a subgroup size other than specified is
+ undefined behaviour.
+
Syntax:
```
@@ -431,7 +437,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
GPU_OptionalDimSizeHintAttr:$known_block_size,
- GPU_OptionalDimSizeHintAttr:$known_grid_size);
+ GPU_OptionalDimSizeHintAttr:$known_grid_size,
+ OptionalAttr<I32Attr>:$known_subgroup_size);
let regions = (region AnyRegion:$body);
let skipDefaultBuilders = 1;
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 544f1f4a4f6a79..2a5e5de2357641 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -271,24 +271,55 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
typeMangling.value());
}
+ static std::optional<uint32_t>
+ getIntelReqdSubGroupSize(FunctionOpInterface func) {
+ constexpr llvm::StringLiteral discardableIntelReqdSubgroupSize =
+ "llvm.intel_reqd_sub_group_size";
+ IntegerAttr reqdSubgroupSizeAttr = llvm::cast_if_present<IntegerAttr>(
+ func->getAttr(discardableIntelReqdSubgroupSize));
+ if (!reqdSubgroupSizeAttr)
+ return {};
+
+ return reqdSubgroupSizeAttr.getInt();
+ }
+
/// Get the subgroup size from the target or return a default.
- static int getSubgroupSize(Operation *op) {
- return spirv::lookupTargetEnvOrDefault(op)
- .getResourceLimits()
- .getSubgroupSize();
+ static std::optional<uint32_t>
+ getKnownSubgroupSize(FunctionOpInterface func) {
+ IntegerAttr knownSubgroupSizeAttr =
+ mlir::gpu::GPUDialect::KnownSubgroupSizeAttrHelper(func->getContext())
+ .getAttr(func);
+ if (!knownSubgroupSizeAttr)
+ return {};
+
+ return knownSubgroupSizeAttr.getInt();
}
- static bool hasValidWidth(gpu::ShuffleOp op) {
+ static std::optional<uint32_t> getSubgroupSize(Operation *op) {
+ FunctionOpInterface func = op->getParentOfType<FunctionOpInterface>();
+ if (!func)
+ return {};
+ auto knownSubgroupSize = getKnownSubgroupSize(func);
+ if (knownSubgroupSize)
+ return knownSubgroupSize;
+ return getIntelReqdSubGroupSize(func);
+ }
+
+ static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
llvm::APInt val;
Value width = op.getWidth();
- return matchPattern(width, m_ConstantInt(&val)) &&
- val == getSubgroupSize(op);
+ return matchPattern(width, m_ConstantInt(&val)) && val == subgroupSize;
}
LogicalResult
matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const final {
- if (!hasValidWidth(op))
+ auto maybeSubgroupSize = getSubgroupSize(op);
+ if (!maybeSubgroupSize)
+ return rewriter.notifyMatchFailure(
+ op, "subgroup size not specified. Should be specified with "
+ "known_subgroup_size.");
+ if (!hasValidWidth(op, maybeSubgroupSize.value()))
return rewriter.notifyMatchFailure(
op, "shuffle width and subgroup size mismatch");
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 910105ddf69586..467d15e5c2ef2b 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -260,7 +260,7 @@ gpu.module @shuffles {
func.func @gpu_shuffles(%val0: i32, %id: i32,
%val1: i64, %mask: i32,
%val2: f32, %delta_up: i32,
- %val3: f64, %delta_down: i32) {
+ %val3: f64, %delta_down: i32) attributes { llvm.intel_reqd_sub_group_size = 32 : i32 } {
%width = arith.constant 32 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
// CHECK-SAME-DAG: no_unwind
@@ -302,9 +302,7 @@ gpu.module @shuffles {
// Check `gpu.shuffle` conversion with explicit subgroup size.
-gpu.module @shuffles attributes {
- spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
-} {
+gpu.module @shuffles {
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
// CHECK-SAME-DAG: no_unwind
// CHECK-SAME-DAG: convergent
@@ -352,7 +350,7 @@ gpu.module @shuffles attributes {
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
- // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
+ // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
func.func @gpu_shuffles(%i8_val: i8,
%i16_val: i16,
%i32_val: i32,
@@ -360,7 +358,7 @@ gpu.module @shuffles attributes {
%f16_val: f16,
%f32_val: f32,
%f64_val: f64,
- %offset: i32) {
+ %offset: i32) attributes {gpu.known_subgroup_size = 16 : i32} {
%width = arith.constant 16 : i32
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
// CHECK: llvm.mlir.constant(true) : i1
|
|
|
||
| static std::optional<uint32_t> | ||
| getIntelReqdSubGroupSize(FunctionOpInterface func) { | ||
| constexpr llvm::StringLiteral discardableIntelReqdSubgroupSize = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be good if we could get this from a function like a IntelReqdSubgroupSizeAttrName function
| // CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64, | ||
| // CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32, | ||
| // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) { | ||
| // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) | |
| // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) attributes {gpu.known_subgroup_size = 16 : i32} { |
include the attribute in the check
|
Sorry, I didn't mean to open this PR on this repo |
No description provided.