From 8ea8439e47a122d7336709e11ff35bca84ff4b9b Mon Sep 17 00:00:00 2001 From: lorenzo chelini Date: Wed, 18 Jun 2025 12:25:19 +0200 Subject: [PATCH 1/3] [MLIR] Mark LLVM::FMAOp as legal since we can lower to NVVM --- .../Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 8 ++++---- mlir/test/Integration/GPU/CUDA/dump-ptx.mlir | 13 ++++++++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 958d0d085fce1..cef250232daf5 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -429,10 +429,10 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) { target.addLegalDialect<::mlir::NVVM::NVVMDialect>(); target.addIllegalDialect(); target.addIllegalOp(); + LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp, + LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp, + LLVM::RoundEvenOp, LLVM::RoundOp, LLVM::SinOp, + LLVM::SqrtOp>(); // TODO: Remove once we support replacing non-root ops. target.addLegalOp(); diff --git a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir index 0cc5d8645bb36..fdc53ff0c4160 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir @@ -2,7 +2,7 @@ // RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \ // RUN: 2>&1 | FileCheck %s -// CHECK: Generated by LLVM NVPTX Back-End +// CHECK-LABEL: Generated by LLVM NVPTX Back-End // CHECK: .visible .func kernel_a() // CHECK: ret; gpu.module @bar { @@ -11,3 +11,14 @@ gpu.module @bar { llvm.return } } + +// CHECK-LABEL: Generated by LLVM NVPTX Back-End +// CHECK: fma.rn.f32 + +gpu.module @foo { + llvm.func @fma(%arg0: f32, %arg1: f32) -> f32 + attributes { gpu.kernel } { + %res = llvm.intr.fma (%arg0, %arg1, %arg1) : (f32, f32, f32) -> f32 + llvm.return %res : f32 + } +} From 341820e33c4769ea1d2526095d55fa16e40e85ee Mon Sep 17 00:00:00 2001 From: lorenzo chelini Date: Wed, 18 Jun 2025 12:45:42 +0200 Subject: [PATCH 2/3] improve --- mlir/test/Integration/GPU/CUDA/dump-ptx.mlir | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir index fdc53ff0c4160..27ec1ec435fef 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir @@ -13,6 +13,7 @@ gpu.module @bar { } // CHECK-LABEL: Generated by LLVM NVPTX Back-End +// CHECK: .visible .func ({{.+}}) fma( // CHECK: fma.rn.f32 gpu.module @foo { From 870f194e2cf9c685ef496bc7f903f52a6722e558 Mon Sep 17 00:00:00 2001 From: lorenzo chelini Date: Wed, 18 Jun 2025 12:59:31 +0200 Subject: [PATCH 3/3] improve --- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 6d4555e815b66..ef06af3ad3163 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1027,7 +1027,7 @@ module attributes {transform.with_named_sequence} { legal_ops = ["func.func", "gpu.module", "gpu.yield"], illegal_dialects = ["gpu"], illegal_ops = ["llvm.copysign", "llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil", - "llvm.ffloor", "llvm.fma", "llvm.frem", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow", + "llvm.ffloor", "llvm.frem", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow", "llvm.roundeven", "llvm.round", "llvm.sin", "llvm.sqrt"], partial_conversion } : !transform.any_op