From 9c2511465f7431e09de4743d1b2b4ee043b69ae3 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Wed, 14 May 2025 17:31:49 +0200 Subject: [PATCH 1/4] [MLIR]NVVM] Add `inline_ptx` op This op allows using PTX directly within the NVVM dialect, while greatly simplifying llvm.inline_asm generation. Example 1: Read-only Parameters ```mlir nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count) : !llvm.ptr, i32 // Lowers to: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.init.b64 [$0], $1;", "l,r" %arg0, %arg2 : (!llvm.ptr, i32) -> () ``` Example 2: Read-only and Write-only Parameters ```mlir %0 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input) : f32 -> f32 // Lowers to: %0 = llvm.inline_asm has_side_effects asm_dialect = att "ex2.approx.ftz.f32 $0, $1;", "=f,f" %arg0 : (f32) -> f32 ``` Example 3: Predicate Usage ```mlir nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count), predicate = %pred : !llvm.ptr, i32, i1 // Lowers to: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b" %arg0, %arg2, %arg3 : (!llvm.ptr, i32, i1) -> () ``` --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 70 +++++++++++++++++++ .../Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 25 +++++++ 2 files changed, 95 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 654aff71f25be..4ba54fa3c1ca7 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -236,6 +236,76 @@ foreach index = !range(0, 32) in { def NVVM_EnvReg # index # Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.envreg" # index>; } +//===----------------------------------------------------------------------===// +// Inline PTX op definition +//===----------------------------------------------------------------------===// + +def NVVM_InlinePtxOP : NVVM_Op<"inline_ptx", + [DeclareOpInterfaceMethods, + AttrSizedOperandSegments]> +{ + let summary = "Inline PTX Op"; + let description = [{This op allows using PTX directly within the NVVM + dialect, while greatly simplifying llvm.inline_asm generation. It + automatically handles register size selection and sets the correct + read/write access for each operand. The operation leverages the + `BasicPtxBuilderInterface` to abstract away low-level details of + PTX assembly formatting. + + The `predicate` attribute is used to specify a predicate for the + PTX instruction. + + Example 1: Read-only Parameters + ```mlir + nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count) : !llvm.ptr, i32 + + // Lowers to: + llvm.inline_asm has_side_effects asm_dialect = att + "mbarrier.init.b64 [$0], $1;", "l,r" %arg0, %arg2 : (!llvm.ptr, i32) -> () + ``` + + Example 2: Read-only and Write-only Parameters + ```mlir + %0 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input) : f32 -> f32 + + // Lowers to: + %0 = llvm.inline_asm has_side_effects asm_dialect = att + "ex2.approx.ftz.f32 $0, $1;", "=f,f" %arg0 : (f32) -> f32 + ``` + + Example 3: Predicate Usage + ```mlir + nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count), + predicate = %pred : !llvm.ptr, i32, i1 + + // Lowers to: + llvm.inline_asm has_side_effects asm_dialect = att + "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b" %arg0, %arg2, %arg3 + : (!llvm.ptr, i32, i1) -> () + ``` + }]; + + let arguments = (ins Variadic:$readOnlyArgs, + StrAttr:$ptxCode, + PtxPredicate:$predicate); + + let results = (outs Variadic:$writeOnlyArgs); + + let assemblyFormat = [{ + $ptxCode `(` $readOnlyArgs `)` + (`,` `predicate` `=` $predicate^)? attr-dict + `:` type(operands) + (`->` type($writeOnlyArgs)^)? + }]; + + let extraClassDefinition = [{ + std::string $cppClass::getPtx() { + StringRef eventName = getPtxCode(); + return std::string(eventName.data()); + } + }]; +} + //===----------------------------------------------------------------------===// // NVVM approximate op definitions //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index c7a6eca158276..1d9164ac94d76 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -680,3 +680,28 @@ llvm.func @llvm_nvvm_barrier_arrive(%barID : i32, %numberOfThreads : i32) { nvvm.barrier.arrive id = %barID number_of_threads = %numberOfThreads llvm.return } + + +// ----- + +llvm.func @init_mbarrier( + %barrier_gen : !llvm.ptr, + %barrier : !llvm.ptr<3>, + %count : i32, + %pred : i1) { + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.init.b64 [$0], $1;", "l,r" + nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count) : !llvm.ptr, i32 + // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b" + nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count), predicate = %pred : !llvm.ptr, i32, i1 + llvm.return +} +// ----- + +llvm.func @ex2(%input : f32, %pred : i1) { + // CHECK: %{{.*}} = llvm.inline_asm has_side_effects asm_dialect = att "ex2.approx.ftz.f32 $0, $1;", "=f,f" %{{.*}} : (f32) -> f32 + %0 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input) : f32 -> f32 + + // CHECK: %{{.*}} = llvm.inline_asm has_side_effects asm_dialect = att "@$1 ex2.approx.ftz.f32 $0, $1;", "=f,f,b" %{{.*}}, %{{.*}} : (f32, i1) -> f32 + %1 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input), predicate = %pred : f32, i1 -> f32 + llvm.return +} \ No newline at end of file From 65dc6f2b7420306fe2fcd8de1b45549e9f7a4726 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Wed, 14 May 2025 17:35:25 +0200 Subject: [PATCH 2/4] a --- mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 1d9164ac94d76..8d720ce62a91b 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -704,4 +704,4 @@ llvm.func @ex2(%input : f32, %pred : i1) { // CHECK: %{{.*}} = llvm.inline_asm has_side_effects asm_dialect = att "@$1 ex2.approx.ftz.f32 $0, $1;", "=f,f,b" %{{.*}}, %{{.*}} : (f32, i1) -> f32 %1 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input), predicate = %pred : f32, i1 -> f32 llvm.return -} \ No newline at end of file +} From 57988547c961fb8bb7d06456ec6a9d8d3a028ecd Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Thu, 15 May 2025 13:24:43 +0200 Subject: [PATCH 3/4] Update mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td Co-authored-by: Mehdi Amini --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 4ba54fa3c1ca7..9f9b24395107e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -240,7 +240,7 @@ foreach index = !range(0, 32) in { // Inline PTX op definition //===----------------------------------------------------------------------===// -def NVVM_InlinePtxOP : NVVM_Op<"inline_ptx", +def NVVM_InlinePtxOp : NVVM_Op<"inline_ptx", [DeclareOpInterfaceMethods, AttrSizedOperandSegments]> { From 71634369b08b4e06af11855caa0a1eaeec981c48 Mon Sep 17 00:00:00 2001 From: Guray Ozen Date: Thu, 15 May 2025 13:28:56 +0200 Subject: [PATCH 4/4] Update NVVMOps.td --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 9f9b24395107e..a8e7dcb54ac20 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -300,8 +300,8 @@ def NVVM_InlinePtxOp : NVVM_Op<"inline_ptx", let extraClassDefinition = [{ std::string $cppClass::getPtx() { - StringRef eventName = getPtxCode(); - return std::string(eventName.data()); + StringRef ptxInstStr = getPtxCode(); + return std::string(ptxInstStr.data()); } }]; }