diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 64720bfe6cf50..767221177c816 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -535,18 +535,22 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern { LogicalResult matchAndRewrite(LDSBarrierOp op, LDSBarrierOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { - bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion == 11; + bool requiresInlineAsm = chipset < kGfx90a || chipset.majorVersion >= 11; if (requiresInlineAsm) { auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(), LLVM::AsmDialect::AD_ATT); - const char *asmStr = + const char *asmStrPreGfx12 = ";;;WARNING: BREAKS DEBUG WATCHES\ns_waitcnt lgkmcnt(0)\ns_barrier"; + const char *asmStr = + ";;;WARNING: BREAKS DEBUG WATCHES\n" + "s_wait_dscnt 0x0\ns_barrier_signal -1\ns_barrier_wait -1"; const char *constraints = ""; rewriter.replaceOpWithNewOp( op, /*resultTypes=*/TypeRange(), /*operands=*/ValueRange(), - /*asm_string=*/asmStr, constraints, /*has_side_effects=*/true, + /*asm_string=*/chipset.majorVersion >= 12 ? asmStr : asmStrPreGfx12, + constraints, /*has_side_effects=*/true, /*is_align_stack=*/false, LLVM::TailCallKind::None, /*asm_dialect=*/asmDialectAttr, /*operand_attrs=*/ArrayAttr()); @@ -574,14 +578,12 @@ struct LDSBarrierOpLowering : public ConvertOpToLLVMPattern { Location loc = op->getLoc(); ROCDL::SWaitcntOp::create(rewriter, loc, ldsOnlyBits); rewriter.replaceOpWithNewOp(op); + return success(); } else { - Location loc = op->getLoc(); - ROCDL::WaitDscntOp::create(rewriter, loc, 0); - ROCDL::BarrierSignalOp::create(rewriter, loc, -1); - rewriter.replaceOpWithNewOp(op, -1); + return op.emitOpError( + "don't know how to lower this for chipset major version") + << chipset.majorVersion; } - - return success(); } }; diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir index cc1162d8b0de8..d59f7fe3ba4c2 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir @@ -424,9 +424,8 @@ func.func @lds_barrier() { // GFX10-NEXT: rocdl.s.barrier // GFX11: llvm.inline_asm has_side_effects asm_dialect = att // GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier" - // GFX12: rocdl.s.wait.dscnt 0 - // GFX12-NEXT: rocdl.s.barrier.signal -1 - // GFX12-NEXT: rocdl.s.barrier.wait -1 + // GFX12: llvm.inline_asm has_side_effects asm_dialect = att + // GFX12-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_wait_dscnt 0x0\0As_barrier_signal -1\0As_barrier_wait -1" amdgpu.lds_barrier func.return }