@@ -228,11 +228,11 @@ def BufferAtomicRMWOp : TT_AMDGPU_Op<"buffer_atomic_rmw", [
228228 TypesMatchWith<"result element type matches the pointed type of ptr", "result", "ptr", "getPointerTypeToElement($_self)">,
229229 TypesMatchWith<"result and offsets have the same shape", "result", "offsets", "getI32SameShape($_self)">,
230230 TypesMatchWith<"result and mask have the same shape", "result", "mask", "getI1SameShape($_self)",
231- "($_op.getOperands().size() <= 3 ) || std::equal_to<>()">,
231+ "($_op.getOperands().size() <= 4 ) || std::equal_to<>()">,
232232 TypesMatchWith<"value element type matches the pointed type of ptr", "value", "ptr", "getPointerTypeToElement($_self)">,
233233 TypesMatchWith<"value and offsets have the same shape", "value", "offsets", "getI32SameShape($_self)">,
234234 TypesMatchWith<"value and mask have the same shape", "value", "mask", "getI1SameShape($_self)",
235- "($_op.getOperands().size() <= 3 ) || std::equal_to<>()">,
235+ "($_op.getOperands().size() <= 4 ) || std::equal_to<>()">,
236236]>{
237237 let summary = "Atomic RMW op which reads, modifies, and writes to a scalar base pointer and a tensor offset";
238238 let description = [{
@@ -242,13 +242,17 @@ def BufferAtomicRMWOp : TT_AMDGPU_Op<"buffer_atomic_rmw", [
242242 the atomic RMW op. Elements with `mask[i] == 0` are dropped (i.e., the atomic is not executed).
243243 Similar to TT_AtomicRMWOp: Buffer atomic RMW ops load data at $ptr, do $rmw_op with $val, and store result to $ptr with
244244 the specified memory semantics and scope. Atomic RMW ops return the pre-op value if used, otherwise the value is implicitly dropped.
245+ Stride is the distance between the beginning of contiguous memory chunks. When performing a RMW, the `stride` is
246+ the address difference between the first elements of each row in bytes. Compiler tries to obtain the `stride`
247+ when it converts to the buffer ops because it is important for optimizing the cache memory access.
245248 }];
246249 let arguments = (
247250 ins
248251 TT_AtomicRMWAttr:$atomic_rmw_op,
249252 TT_Ptr:$ptr,
250253 I32Tensor:$offsets,
251254 TT_Tensor:$value,
255+ I32:$stride,
252256 TT_MemSemanticAttr:$sem,
253257 TT_MemSyncScopeAttr:$scope,
254258 Optional<TT_BoolTensor>:$mask
@@ -257,6 +261,7 @@ def BufferAtomicRMWOp : TT_AMDGPU_Op<"buffer_atomic_rmw", [
257261
258262 let assemblyFormat = [{
259263 $atomic_rmw_op `,` $sem `,` $scope `,` $value `,` $ptr `[` $offsets `]` (`,` $mask^)?
264+ `stride` `=` $stride
260265 attr-dict `:` type($result)
261266 }];
262267}
0 commit comments