Skip to content

Conversation

@ptrojahn
Copy link
Contributor

@ptrojahn ptrojahn commented Aug 8, 2025

Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here:

if (TII->isBarrierStart(MI.getOpcode()) &&

Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we
need to use inline assembly to get rid of the wait introduced here: https://github.com/llvm/llvm-project/blob/bd9117c569678e7af042074cbcaba860ab6eefb3/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp#L2017
@llvmbot
Copy link
Member

llvmbot commented Aug 8, 2025

@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-backend-amdgpu

Author: Paul Trojahn (ptrojahn)

Changes

Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here:

if (TII->isBarrierStart(MI.getOpcode()) &&


Full diff: https://github.com/llvm/llvm-project/pull/152778.diff

2 Files Affected:

  • (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+11-9)
  • (modified) mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir (+2-3)
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 64720bfe6cf50..767221177c816 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -535,18 +535,22 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
   LogicalResult
   matchAndRewrite(LDSBarrierOp op, LDSBarrierOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion == 11;
+    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion >= 11;
 
     if (requiresInlineAsm) {
       auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(),
                                                       LLVM::AsmDialect::AD_ATT);
-      const char *asmStr =
+      const char *asmStrPreGfx12 =
           ";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier";
+      const char *asmStr =
+          ";;;WARNING: BREAKS DEBUG WATCHES\n"
+          "s_wait_dscnt 0x0\ns_barrier_signal -1\ns_barrier_wait -1";
       const char *constraints = "";
       rewriter.replaceOpWithNewOp<LLVM::InlineAsmOp>(
           op,
           /*resultTypes=*/TypeRange(), /*operands=*/ValueRange(),
-          /*asm_string=*/asmStr, constraints, /*has_side_effects=*/true,
+          /*asm_string=*/chipset.majorVersion >= 12 ? asmStr : asmStrPreGfx12,
+          constraints, /*has_side_effects=*/true,
           /*is_align_stack=*/false, LLVM::TailCallKind::None,
           /*asm_dialect=*/asmDialectAttr,
           /*operand_attrs=*/ArrayAttr());
@@ -574,14 +578,12 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
       Location loc = op->getLoc();
       ROCDL::SWaitcntOp::create(rewriter, loc, ldsOnlyBits);
       rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
+      return success();
     } else {
-      Location loc = op->getLoc();
-      ROCDL::WaitDscntOp::create(rewriter, loc, 0);
-      ROCDL::BarrierSignalOp::create(rewriter, loc, -1);
-      rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
+      return op.emitOpError(
+                 "don't know how to lower this for chipset major version")
+             << chipset.majorVersion;
     }
-
-    return success();
   }
 };
 
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index cc1162d8b0de8..d59f7fe3ba4c2 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -424,9 +424,8 @@ func.func @lds_barrier() {
   // GFX10-NEXT: rocdl.s.barrier
   // GFX11:  llvm.inline_asm has_side_effects asm_dialect = att
   // GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
-  // GFX12:  rocdl.s.wait.dscnt 0
-  // GFX12-NEXT: rocdl.s.barrier.signal -1
-  // GFX12-NEXT: rocdl.s.barrier.wait -1
+  // GFX12:  llvm.inline_asm has_side_effects asm_dialect = att
+  // GFX12-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_wait_dscnt 0x0\0As_barrier_signal -1\0As_barrier_wait -1"
   amdgpu.lds_barrier
   func.return
 }

@llvmbot
Copy link
Member

llvmbot commented Aug 8, 2025

@llvm/pr-subscribers-mlir-gpu

Author: Paul Trojahn (ptrojahn)

Changes

Just like gfx11, gfx12 does not support FeatureBackOffBarrier, so we need to use inline assembly to get rid of the wait introduced here:

if (TII->isBarrierStart(MI.getOpcode()) &&


Full diff: https://github.com/llvm/llvm-project/pull/152778.diff

2 Files Affected:

  • (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+11-9)
  • (modified) mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir (+2-3)
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 64720bfe6cf50..767221177c816 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -535,18 +535,22 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
   LogicalResult
   matchAndRewrite(LDSBarrierOp op, LDSBarrierOp::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
-    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion == 11;
+    bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion >= 11;
 
     if (requiresInlineAsm) {
       auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(),
                                                       LLVM::AsmDialect::AD_ATT);
-      const char *asmStr =
+      const char *asmStrPreGfx12 =
           ";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier";
+      const char *asmStr =
+          ";;;WARNING: BREAKS DEBUG WATCHES\n"
+          "s_wait_dscnt 0x0\ns_barrier_signal -1\ns_barrier_wait -1";
       const char *constraints = "";
       rewriter.replaceOpWithNewOp<LLVM::InlineAsmOp>(
           op,
           /*resultTypes=*/TypeRange(), /*operands=*/ValueRange(),
-          /*asm_string=*/asmStr, constraints, /*has_side_effects=*/true,
+          /*asm_string=*/chipset.majorVersion >= 12 ? asmStr : asmStrPreGfx12,
+          constraints, /*has_side_effects=*/true,
           /*is_align_stack=*/false, LLVM::TailCallKind::None,
           /*asm_dialect=*/asmDialectAttr,
           /*operand_attrs=*/ArrayAttr());
@@ -574,14 +578,12 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern<LDSBarrierOp> {
       Location loc = op->getLoc();
       ROCDL::SWaitcntOp::create(rewriter, loc, ldsOnlyBits);
       rewriter.replaceOpWithNewOp<ROCDL::SBarrierOp>(op);
+      return success();
     } else {
-      Location loc = op->getLoc();
-      ROCDL::WaitDscntOp::create(rewriter, loc, 0);
-      ROCDL::BarrierSignalOp::create(rewriter, loc, -1);
-      rewriter.replaceOpWithNewOp<ROCDL::BarrierWaitOp>(op, -1);
+      return op.emitOpError(
+                 "don't know how to lower this for chipset major version")
+             << chipset.majorVersion;
     }
-
-    return success();
   }
 };
 
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index cc1162d8b0de8..d59f7fe3ba4c2 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -424,9 +424,8 @@ func.func @lds_barrier() {
   // GFX10-NEXT: rocdl.s.barrier
   // GFX11:  llvm.inline_asm has_side_effects asm_dialect = att
   // GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
-  // GFX12:  rocdl.s.wait.dscnt 0
-  // GFX12-NEXT: rocdl.s.barrier.signal -1
-  // GFX12-NEXT: rocdl.s.barrier.wait -1
+  // GFX12:  llvm.inline_asm has_side_effects asm_dialect = att
+  // GFX12-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_wait_dscnt 0x0\0As_barrier_signal -1\0As_barrier_wait -1"
   amdgpu.lds_barrier
   func.return
 }

@ptrojahn
Copy link
Contributor Author

ptrojahn commented Sep 5, 2025

With #155370 merged this should not be necessary anymore

@ptrojahn ptrojahn closed this Sep 5, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants