Skip to content

Commit 4616092

Browse files
authored
[DIALECT] s/TMAStoreWait/TMAStoreWaitOp/g (NFC) (#5687)
All other ops are named with the *Op suffix. Fix the name of this op to align with the rest.
1 parent 8a16d88 commit 4616092

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed

include/triton/Dialect/TritonNvidiaGPU/IR/TritonNvidiaGPUOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def TTNG_AsyncTMACopyLocalToGlobalOp : TTNG_Op<"async_tma_copy_local_to_global",
258258
}];
259259
}
260260

261-
def TTNG_TMAStoreWait : TTNG_Op<"async_tma_store_wait"> {
261+
def TTNG_TMAStoreWaitOp : TTNG_Op<"async_tma_store_wait"> {
262262
let summary = "wait until all the inputs are read.";
263263
let arguments = (ins I32Attr:$pendings);
264264
let description = [{

lib/Dialect/TritonGPU/Transforms/Pipeliner/TMAStoresPipeline.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static void createTMAAsyncCopy(scf::ForOp &forOp,
6060

6161
// Put wait before the local_store make the store truly async. We know
6262
// that we are the only user of the CopyLocalToGlobal.
63-
builder.create<ttng::TMAStoreWait>(loc, 0);
63+
builder.create<ttng::TMAStoreWaitOp>(loc, 0);
6464
builder.create<ttg::LocalStoreOp>(loc, storeOp.getSrc(), alloc);
6565
builder.create<ttng::FenceAsyncSharedOp>(loc, false);
6666
Value tmaPtr = builder.create<triton::nvidia_gpu::TensorDescToTMAPtrOp>(
@@ -102,7 +102,7 @@ bool mlir::triton::pipelineTMAStores(scf::ForOp forOp) {
102102
// Deallocate shared memory buffers.
103103
OpBuilder builder(forOp);
104104
builder.setInsertionPointAfter(forOp);
105-
builder.create<ttng::TMAStoreWait>(forOp->getLoc(), 0);
105+
builder.create<ttng::TMAStoreWaitOp>(forOp->getLoc(), 0);
106106
for (auto it : storeToAlloc) {
107107
builder.create<ttg::LocalDeallocOp>(forOp->getLoc(), it.second);
108108
}

lib/Dialect/TritonNvidiaGPU/Transforms/TMALowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class TMAStoreLowering
101101
loc, op.getDesc());
102102
rewriter.create<triton::nvidia_gpu::AsyncTMACopyLocalToGlobalOp>(
103103
loc, tmaPtr, op.getIndices(), alloc);
104-
rewriter.create<triton::nvidia_gpu::TMAStoreWait>(loc, 0);
104+
rewriter.create<triton::nvidia_gpu::TMAStoreWaitOp>(loc, 0);
105105
rewriter.eraseOp(op);
106106
return success();
107107
}

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,12 +1310,12 @@ struct AsyncCommitGroupOpConversion
13101310
}
13111311
};
13121312

1313-
struct TMAStoreWaitConversion
1314-
: public ConvertOpToLLVMPattern<triton::nvidia_gpu::TMAStoreWait> {
1313+
struct TMAStoreWaitOpConversion
1314+
: public ConvertOpToLLVMPattern<triton::nvidia_gpu::TMAStoreWaitOp> {
13151315
using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
13161316

13171317
LogicalResult
1318-
matchAndRewrite(triton::nvidia_gpu::TMAStoreWait op, OpAdaptor adaptor,
1318+
matchAndRewrite(triton::nvidia_gpu::TMAStoreWaitOp op, OpAdaptor adaptor,
13191319
ConversionPatternRewriter &rewriter) const override {
13201320
PTXBuilder ptxBuilder;
13211321
auto &asyncWaitOp = *ptxBuilder.create<>("cp.async.bulk.wait_group.read");
@@ -1343,6 +1343,6 @@ void mlir::triton::NVIDIA::populateLoadStoreOpToLLVMPatterns(
13431343
patterns.add<AsyncCommitGroupOpConversion>(typeConverter, benefit);
13441344
patterns.add<AsyncWaitOpConversion>(typeConverter, benefit);
13451345
patterns.add<AsyncTMACopyGlobalToLocalOpConversion,
1346-
AsyncTMACopyLocalToGlobalOpConversion, TMAStoreWaitConversion>(
1346+
AsyncTMACopyLocalToGlobalOpConversion, TMAStoreWaitOpConversion>(
13471347
typeConverter, benefit);
13481348
}

0 commit comments

Comments
 (0)