Skip to content

Commit aca998e

Browse files
committed
[mlir][gpu] Add known subgroup size
Also use it for lowering in GPUToLLVMSPV
1 parent b9754e9 commit aca998e

File tree

4 files changed

+35
-16
lines changed

4 files changed

+35
-16
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUBase.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ def GPU_Dialect : Dialect {
6464

6565
let discardableAttrs = (ins
6666
"::mlir::DenseI32ArrayAttr":$known_block_size,
67-
"::mlir::DenseI32ArrayAttr":$known_grid_size
67+
"::mlir::DenseI32ArrayAttr":$known_grid_size,
68+
"::mlir::IntegerAttr" : $known_subgroup_size
6869
);
6970

7071
let dependentDialects = ["arith::ArithDialect"];

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,12 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
388388
by using `gpu.known_block_size` or `gpu.known_grid_size`, but this carries
389389
the risk that they will de discarded.
390390

391+
A function may optionally be annotated with the subgroup size that will be
392+
used when it is launched using the `known_subgroup_size` attribute. If set,
393+
this attribute is a single positive integer (i.e. > 0). Launching a function
394+
with this annotation, using a subgroup size other than specified is
395+
undefined behaviour.
396+
391397
Syntax:
392398

393399
```
@@ -431,7 +437,8 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
431437
OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
432438
OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
433439
GPU_OptionalDimSizeHintAttr:$known_block_size,
434-
GPU_OptionalDimSizeHintAttr:$known_grid_size);
440+
GPU_OptionalDimSizeHintAttr:$known_grid_size,
441+
OptionalAttr<I32Attr>:$known_subgroup_size);
435442
let regions = (region AnyRegion:$body);
436443

437444
let skipDefaultBuilders = 1;

mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -272,23 +272,36 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
272272
}
273273

274274
/// Get the subgroup size from the target or return a default.
275-
static int getSubgroupSize(Operation *op) {
276-
return spirv::lookupTargetEnvOrDefault(op)
277-
.getResourceLimits()
278-
.getSubgroupSize();
275+
static std::optional<uint32_t> getSubgroupSize(Operation *op) {
276+
// TODO check for intel_reqd_sub_group_size
277+
278+
FunctionOpInterface func = op->getParentOfType<FunctionOpInterface>();
279+
if (!func)
280+
return {};
281+
282+
IntegerAttr knownSubgroupSizeAttr =
283+
mlir::gpu::GPUDialect::KnownSubgroupSizeAttrHelper(op->getContext())
284+
.getAttr(func);
285+
if (!knownSubgroupSizeAttr)
286+
return {};
287+
return knownSubgroupSizeAttr.getInt();
279288
}
280289

281-
static bool hasValidWidth(gpu::ShuffleOp op) {
290+
static bool hasValidWidth(gpu::ShuffleOp op, uint32_t subgroupSize) {
282291
llvm::APInt val;
283292
Value width = op.getWidth();
284-
return matchPattern(width, m_ConstantInt(&val)) &&
285-
val == getSubgroupSize(op);
293+
return matchPattern(width, m_ConstantInt(&val)) && val == subgroupSize;
286294
}
287295

288296
LogicalResult
289297
matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
290298
ConversionPatternRewriter &rewriter) const final {
291-
if (!hasValidWidth(op))
299+
auto maybeSubgroupSize = getSubgroupSize(op);
300+
if (!maybeSubgroupSize)
301+
return rewriter.notifyMatchFailure(
302+
op, "subgroup size not specified. Should be specified with "
303+
"known_subgroup_size.");
304+
if (!hasValidWidth(op, maybeSubgroupSize.value()))
292305
return rewriter.notifyMatchFailure(
293306
op, "shuffle width and subgroup size mismatch");
294307

mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ gpu.module @shuffles {
260260
func.func @gpu_shuffles(%val0: i32, %id: i32,
261261
%val1: i64, %mask: i32,
262262
%val2: f32, %delta_up: i32,
263-
%val3: f64, %delta_down: i32) {
263+
%val3: f64, %delta_down: i32) attributes {gpu.known_subgroup_size = 32 : i32} {
264264
%width = arith.constant 32 : i32
265265
// CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
266266
// CHECK-SAME-DAG: no_unwind
@@ -302,9 +302,7 @@ gpu.module @shuffles {
302302

303303
// Check `gpu.shuffle` conversion with explicit subgroup size.
304304

305-
gpu.module @shuffles attributes {
306-
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
307-
} {
305+
gpu.module @shuffles {
308306
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
309307
// CHECK-SAME-DAG: no_unwind
310308
// CHECK-SAME-DAG: convergent
@@ -352,15 +350,15 @@ gpu.module @shuffles attributes {
352350
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
353351
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
354352
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
355-
// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
353+
// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
356354
func.func @gpu_shuffles(%i8_val: i8,
357355
%i16_val: i16,
358356
%i32_val: i32,
359357
%i64_val: i64,
360358
%f16_val: f16,
361359
%f32_val: f32,
362360
%f64_val: f64,
363-
%offset: i32) {
361+
%offset: i32) attributes {gpu.known_subgroup_size = 16 : i32} {
364362
%width = arith.constant 16 : i32
365363
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
366364
// CHECK: llvm.mlir.constant(true) : i1

0 commit comments

Comments
 (0)