Skip to content

Commit 9706ccc

Browse files
committed
Move to a workgroup fence from a workgroup-one-as fence because things don't work how I thought they would
1 parent b414fff commit 9706ccc

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,15 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
543543

544544
Attribute mmra =
545545
rewriter.getAttr<LLVM::MMRATagAttr>("amdgpu-synchronize-as", "local");
546-
StringRef scope = "workgroup-one-as";
546+
// Note: while there *is* a workgroup-one-as scope, this, when combined with
547+
// the MMRA, will lead to the fence having no effect. This is because
548+
// the codepaths for an atomic load or store will observe that a
549+
// one-address-space atomic to LDS requires no synchronization because
550+
// operations on LDS are totally ordered with respect to each other,
551+
// and so will not emit the correct waitcnt operations that these fences
552+
// are intended to produce. Therefore, we use a broader type of fence
553+
// and rely on the MMRA to relax it to the semantics we want.
554+
StringRef scope = "workgroup";
547555

548556
auto relFence = LLVM::FenceOp::create(rewriter, loc,
549557
LLVM::AtomicOrdering::release, scope);

mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ func.func @amdgpu_raw_buffer_atomic_cmpswap_v2f16(%src : vector<2xf16>, %cmp : v
416416

417417
// CHECK-LABEL: func @lds_barrier
418418
func.func @lds_barrier() {
419-
// CHECK: llvm.fence syncscope("workgroup-one-as") release {llvm.mmra = #[[$MMRA_TAG]]}
419+
// CHECK: llvm.fence syncscope("workgroup") release {llvm.mmra = #[[$MMRA_TAG]]}
420420
// GFX908: llvm.inline_asm has_side_effects asm_dialect = att
421421
// GFX908-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_barrier"
422422
// GFX90A-NEXT: rocdl.s.barrier
@@ -425,7 +425,7 @@ func.func @lds_barrier() {
425425
// GFX11-NEXT: rocdl.s.barrier
426426
// GFX12-NEXT: rocdl.s.barrier.signal -1
427427
// GFX12-NEXT: rocdl.s.barrier.wait -1
428-
// CHECK-NEXT: llvm.fence syncscope("workgroup-one-as") acquire {llvm.mmra = #[[$MMRA_TAG]]}
428+
// CHECK-NEXT: llvm.fence syncscope("workgroup") acquire {llvm.mmra = #[[$MMRA_TAG]]}
429429
amdgpu.lds_barrier
430430
func.return
431431
}

0 commit comments

Comments
 (0)