@@ -1227,7 +1227,7 @@ def AMDGPU_ScaledMFMAOp :
12271227}
12281228
12291229def AMDGPU_MakeDmaBaseOp :
1230- AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
1230+ AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments, AllElementTypesMatch<["global", "lds"]> ]>,
12311231 Arguments<(ins Arg<AnyMemRef>:$global,
12321232 Variadic<Index>:$global_indices,
12331233 Arg<AnyMemRef>:$lds,
@@ -1293,8 +1293,8 @@ def AMDGPU_MakeDmaDescriptorOp :
12931293 DenseI64ArrayAttr: $global_static_strides,
12941294 Variadic<Index>: $shared_dynamic_sizes,
12951295 DenseI64ArrayAttr: $shared_static_sizes,
1296- Optional<Index>: $pad ,
1297- Optional<Index>: $pad_every ,
1296+ Optional<Index>: $pad_amount ,
1297+ Optional<Index>: $pad_interval ,
12981298 Optional<AnyMemRef>: $atomic_barrier_address,
12991299 Variadic<Index>: $atomic_barrier_indices,
13001300 Optional<Index>: $global_increment,
@@ -1316,6 +1316,10 @@ def AMDGPU_MakeDmaDescriptorOp :
13161316 Padding can be applied to the LDS address when copying from memory to LDS,
13171317 but not when copying from LDS to memory.
13181318 The values in the padded target addresses remain the same as before the operation was applied.
1319+ $pad_interval must be a power of two contained in [2, 256].
1320+ $pad_amount must be a value contained in [1, 128].
1321+
1322+ $atomic_barrier_address must be aligned to 8 bytes.
13191323
13201324 2D and 3D tensors may be iterated over by setting $global_increment, $lds_increment, and $iteration_count.
13211325 $global_increment determines how much to increment the starting global memory address per iteration in units of the $base's element type.
@@ -1330,7 +1334,7 @@ def AMDGPU_MakeDmaDescriptorOp :
13301334
13311335 // Example of moving a two dimension tensor to LDS where padding is applied after every integer.
13321336 %base = amdgpu.make_dma_base %global[0, 0], %lds[0, 0] : memref<32x32xi32>, memref<64x64xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
1333- %descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad pad_every %pad_every ) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
1337+ %descriptor = amdgpu.make_dma_descriptor %base globalSize [32, 32] globalStride [32, 1] sharedSize [64, 64] padding(%pad_amount pad_every %pad_interval ) : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
13341338 amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
13351339 ```
13361340 }];
@@ -1340,14 +1344,37 @@ def AMDGPU_MakeDmaDescriptorOp :
13401344 `globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
13411345 `globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
13421346 `sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
1343- ( `padShared` `(` $pad ^ `every` $pad_every `)` )?
1347+ ( `padShared` `(` $pad_amount ^ `every` $pad_interval `)` )?
13441348 ( `atomicBarrier` `(` $atomic_barrier_address^ `[` $atomic_barrier_indices `]`
13451349 `:` type($atomic_barrier_address) `)`)?
13461350 ( `iterate` $global_increment^ `,` $lds_increment `,` $iteration_count )?
13471351 attr-dict `:` qualified(type($base)) `->` type(results)
13481352 }];
13491353
1354+ let extraClassDeclaration = [{
1355+ int64_t getRank() {
1356+ return getGlobalStaticSizes().size();
1357+ }
1358+
1359+ unsigned getElementTypeWidth() {
1360+ return getBase().getType().getElementType().getIntOrFloatBitWidth();
1361+ }
1362+
1363+ SmallVector<OpFoldResult> getMixedGlobalSizes() {
1364+ return getMixedValues(getGlobalStaticSizes(), getGlobalDynamicSizes(), getContext());
1365+ }
1366+
1367+ SmallVector<OpFoldResult> getMixedGlobalStrides() {
1368+ return getMixedValues(getGlobalStaticStrides(), getGlobalDynamicStrides(), getContext());
1369+ }
1370+
1371+ SmallVector<OpFoldResult> getMixedSharedSizes() {
1372+ return getMixedValues(getSharedStaticSizes(), getSharedDynamicSizes(), getContext());
1373+ }
1374+ }];
1375+
13501376 let hasVerifier = 1;
1377+ let hasFolder = 1;
13511378}
13521379
13531380#endif // AMDGPU
0 commit comments