Skip to content

Commit d0f07a5

Browse files
amd-eochoaloHoney Goyal
authored andcommitted
[mlir][amdgpu] Add lowering for make_dma_descriptor (llvm#169955)
* Adds initial lowering for make_dma_descriptor supporting tensors of rank 2. * Adds folders for make_dma_descriptor allowing statically known operands to be folded into attributes. * Add AllElementTypesMatch<["lds", "global"]> to make_dma_base. * Rename pad to pad_amount * Rename pad_every to pad_interval
1 parent 1c8e68f commit d0f07a5

File tree

7 files changed

+640
-56
lines changed

7 files changed

+640
-56
lines changed

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,7 +1227,7 @@ def AMDGPU_ScaledMFMAOp :
12271227
}
12281228

12291229
def AMDGPU_MakeDmaBaseOp :
1230-
AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
1230+
AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments, AllElementTypesMatch<["global", "lds"]>]>,
12311231
Arguments<(ins Arg<AnyMemRef>:$global,
12321232
Variadic<Index>:$global_indices,
12331233
Arg<AnyMemRef>:$lds,
@@ -1293,8 +1293,8 @@ def AMDGPU_MakeDmaDescriptorOp :
12931293
DenseI64ArrayAttr: $global_static_strides,
12941294
Variadic<Index>: $shared_dynamic_sizes,
12951295
DenseI64ArrayAttr: $shared_static_sizes,
1296-
Optional<Index>: $pad,
1297-
Optional<Index>: $pad_every,
1296+
Optional<Index>: $pad_amount,
1297+
Optional<Index>: $pad_interval,
12981298
Optional<AnyMemRef>: $atomic_barrier_address,
12991299
Variadic<Index>: $atomic_barrier_indices,
13001300
Optional<Index>: $global_increment,
@@ -1316,6 +1316,10 @@ def AMDGPU_MakeDmaDescriptorOp :
13161316
Padding can be applied to the LDS address when copying from memory to LDS,
13171317
but not when copying from LDS to memory.
13181318
The values in the padded target addresses remain the same as before the operation was applied.
1319+
$pad_interval must be a power of two contained in [2, 256].
1320+
$pad_amount must be a value contained in [1, 128].
1321+
1322+
$atomic_barrier_address must be aligned to 8 bytes.
13191323

13201324
2D and 3D tensors may be iterated over by setting $global_increment, $lds_increment, and $iteration_count.
13211325
$global_increment determines how much to increment the starting global memory address per iteration in units of the $base's element type.
@@ -1330,7 +1334,7 @@ def AMDGPU_MakeDmaDescriptorOp :
13301334

13311335
// Example of moving a two dimension tensor to LDS where padding is applied after every integer.
13321336
%base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
1333-
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad pad_every %pad_every) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
1337+
%descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad_amount pad_every %pad_interval) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
13341338
amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
13351339
```
13361340
}];
@@ -1340,14 +1344,37 @@ def AMDGPU_MakeDmaDescriptorOp :
13401344
`globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
13411345
`globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
13421346
`sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
1343-
( `padShared` `(` $pad^ `every` $pad_every `)` )?
1347+
( `padShared` `(` $pad_amount^ `every` $pad_interval `)` )?
13441348
( `atomicBarrier` `(` $atomic_barrier_address^ `[` $atomic_barrier_indices `]`
13451349
`:` type($atomic_barrier_address) `)`)?
13461350
( `iterate` $global_increment^ `,` $lds_increment `,` $iteration_count )?
13471351
attr-dict `:` qualified(type($base)) `->` type(results)
13481352
}];
13491353

1354+
let extraClassDeclaration = [{
1355+
int64_t getRank() {
1356+
return getGlobalStaticSizes().size();
1357+
}
1358+
1359+
unsigned getElementTypeWidth() {
1360+
return getBase().getType().getElementType().getIntOrFloatBitWidth();
1361+
}
1362+
1363+
SmallVector<OpFoldResult> getMixedGlobalSizes() {
1364+
return getMixedValues(getGlobalStaticSizes(), getGlobalDynamicSizes(), getContext());
1365+
}
1366+
1367+
SmallVector<OpFoldResult> getMixedGlobalStrides() {
1368+
return getMixedValues(getGlobalStaticStrides(), getGlobalDynamicStrides(), getContext());
1369+
}
1370+
1371+
SmallVector<OpFoldResult> getMixedSharedSizes() {
1372+
return getMixedValues(getSharedStaticSizes(), getSharedDynamicSizes(), getContext());
1373+
}
1374+
}];
1375+
13501376
let hasVerifier = 1;
1377+
let hasFolder = 1;
13511378
}
13521379

13531380
#endif // AMDGPU

0 commit comments

Comments
 (0)