Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/llvm-hash.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3c709802d31b5bc5ed3af8284b40593ff39b9eec
e33e623cdf188faf56da62677910c707a7e94bf7
9 changes: 3 additions & 6 deletions test/Conversion/amd/async_ops_to_llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -259,16 +259,13 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 16 : i32, ttg.sha
// Each thread needs to load 1 element and we load 1 (sizePerThread) per global.load.lds

// CHECK: llvm.getelementptr
// CHECK: %[[aux_ca:.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_ca]]
// CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, 4, 0, 0
%2 = ttg.async_copy_global_to_local %1, %arg2 cacheModifier = ca: tensor<32x32x!tt.ptr<f32>, #blocked> -> <32x32xf32, #shared, #smem, mutable>
// CHECK: llvm.getelementptr
// CHECK: %[[aux_cg:.*]] = llvm.mlir.constant(3 : i32) : i32
// CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_cg]]
// CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, 4, 0, 3
%3 = ttg.async_copy_global_to_local %1, %arg2 cacheModifier = cg: tensor<32x32x!tt.ptr<f32>, #blocked> -> <32x32xf32, #shared, #smem, mutable>
// CHECK: llvm.getelementptr
// CHECK: %[[aux_cv:.*]] = llvm.mlir.constant(17 : i32) : i32
// CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, {{.*}}, {{.*}}, %[[aux_cv]]
// CHECK: rocdl.global.load.lds {{.*}}, {{.*}}, 4, 0, 17
%4 = ttg.async_copy_global_to_local %1, %arg2 cacheModifier = cv: tensor<32x32x!tt.ptr<f32>, #blocked> -> <32x32xf32, #shared, #smem, mutable>
tt.return
}
Expand Down
13 changes: 6 additions & 7 deletions third_party/amd/lib/TritonAMDGPUToLLVM/LoadStoreOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,10 +639,9 @@ struct AsyncCopyGlobalToLocalOpConversion
(vecTy.getNumElements() * vecTy.getElementTypeBitWidth()) / 8;
assert(llvm::isPowerOf2_32(vecBytes));
Value vecBytesVal = b.i32_val(vecBytes);

Value cacheModifiers =
b.i32_val(mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget(
op.getCache(), /*isLoad=*/true, targetInfo));
int32_t cacheModifiers =
mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget(
op.getCache(), /*isLoad=*/true, targetInfo);

Value llMask = adaptor.getMask();
SmallVector<Value> maskElems;
Expand Down Expand Up @@ -680,7 +679,7 @@ struct AsyncCopyGlobalToLocalOpConversion
auto globalLoadLdsOp = rewriter.create<ROCDL::GlobalLoadLDSOp>(
loc,
/*globalPtr=*/srcPtr, /*ldsPtr=*/coalescedShmemAddr[i],
/*size=*/vecBytesVal, /*offset=*/b.i32_val(0),
/*size=*/vecBytes, /*offset=*/0,
/*aux=*/cacheModifiers, /*alias_scopes=*/nullptr,
/*noalias_scopes=*/nullptr, /*tbaa=*/nullptr);
LLVM::AMD::addAsyncCopyAliasScope(globalLoadLdsOp);
Expand All @@ -695,8 +694,8 @@ struct AsyncCopyGlobalToLocalOpConversion
rewriter.create<LLVM::CondBrOp>(loc, pred, loadBlock, afterLoad);
rewriter.setInsertionPointToStart(loadBlock);
auto globalLoadLdsOp = rewriter.create<ROCDL::GlobalLoadLDSOp>(
loc, srcPtr, coalescedShmemAddr[i], vecBytesVal,
/*offset=*/b.i32_val(0), cacheModifiers, nullptr, nullptr, nullptr);
loc, srcPtr, coalescedShmemAddr[i], vecBytes,
/*offset=*/0, cacheModifiers, nullptr, nullptr, nullptr);
LLVM::AMD::addAsyncCopyAliasScope(globalLoadLdsOp);

rewriter.create<LLVM::BrOp>(loc, afterLoad);
Expand Down
Loading