walk instead of dialect conversion

matthias-springer · matthias-springer · commit de9b2af0f06a · 2025-11-09T06:04:21.000Z
diff --git a/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h b/mlir/include/mlir/Conversion/ArithToAPFloat/ArithToAPFloat.h
@@ -12,17 +12,10 @@
 #include <memory>
 
 namespace mlir {
-
-class DialectRegistry;
-class RewritePatternSet;
 class Pass;
 
 #define GEN_PASS_DECL_ARITHTOAPFLOATCONVERSIONPASS
 #include "mlir/Conversion/Passes.h.inc"
-
-namespace arith {
-void populateArithToAPFloatConversionPatterns(RewritePatternSet &patterns);
-} // namespace arith
 } // namespace mlir
 
-#endif // MLIR_CONVERSION_ARITHTOAPFloat_ARITHTOAPFloat_H
+#endif // MLIR_CONVERSION_ARITHTOAPFLOAT_ARITHTOAPFLOAT_H
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
@@ -190,10 +190,13 @@ def ArithToLLVMConversionPass : Pass<"convert-arith-to-llvm"> {
 // ArithToAPFloat
 //===----------------------------------------------------------------------===//
 
-def ArithToAPFloatConversionPass : Pass<"convert-arith-to-apfloat"> {
-  let summary = "Convert Arith dialect ops on FP8 types to APFloat lib calls";
+def ArithToAPFloatConversionPass
+    : Pass<"convert-arith-to-apfloat", "ModuleOp"> {
+  let summary = "Convert Arith ops to APFloat runtime library calls";
   let description = [{
-    This pass converts supported Arith ops which manipulate FP8 typed values to APFloat lib calls.
+    This pass converts supported Arith ops to APFloat-based runtime library
+    calls (APFloatWrappers.cpp). APFloat is a software implementation of
+    floating-point arithmetic operations.
   }];
   let dependentDialects = ["func::FuncDialect"];
   let options = [];
diff --git a/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp b/mlir/lib/Conversion/ArithToAPFloat/ArithToAPFloat.cpp
@@ -13,7 +13,8 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Utils/Utils.h"
 #include "mlir/IR/Verifier.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#include "llvm/ADT/TypeSwitch.h"
 
 namespace mlir {
 #define GEN_PASS_DEF_ARITHTOAPFLOATCONVERSIONPASS
@@ -23,100 +24,55 @@ namespace mlir {
 using namespace mlir;
 using namespace mlir::func;
 
-#define APFLOAT_BIN_OPS(X)                                                     \
-  X(add)                                                                       \
-  X(subtract)                                                                  \
-  X(multiply)                                                                  \
-  X(divide)                                                                    \
-  X(remainder)                                                                 \
-  X(mod)
-
-#define APFLOAT_EXTERN_K(OP) kApFloat_##OP
-
-#define APFLOAT_EXTERN_NAME(OP)                                                \
-  static constexpr llvm::StringRef APFLOAT_EXTERN_K(OP) = "_mlir_"             \
-                                                          "apfloat_" #OP;
-
-namespace mlir::func {
-#define LOOKUP_OR_CREATE_APFLOAT_FN_DECL(OP)                                   \
-  FailureOr<FuncOp> lookupOrCreateApFloat##OP##Fn(                             \
-      OpBuilder &b, Operation *moduleOp,                                       \
-      SymbolTableCollection *symbolTables = nullptr);
-
-APFLOAT_BIN_OPS(LOOKUP_OR_CREATE_APFLOAT_FN_DECL)
-
-#undef LOOKUP_OR_CREATE_APFLOAT_FN_DECL
-
-APFLOAT_BIN_OPS(APFLOAT_EXTERN_NAME)
-
-#define LOOKUP_OR_CREATE_APFLOAT_FN_DEFN(OP)                                   \
-  FailureOr<FuncOp> lookupOrCreateApFloat##OP##Fn(                             \
-      OpBuilder &b, Operation *moduleOp,                                       \
-      SymbolTableCollection *symbolTables) {                                   \
-    return lookupOrCreateFn(b, moduleOp, APFLOAT_EXTERN_K(OP),                 \
-                            {IntegerType::get(moduleOp->getContext(), 32),     \
-                             IntegerType::get(moduleOp->getContext(), 64),     \
-                             IntegerType::get(moduleOp->getContext(), 64)},    \
-                            {IntegerType::get(moduleOp->getContext(), 64)},    \
-                            /*setPrivate*/ true, symbolTables);                \
-  }
-
-APFLOAT_BIN_OPS(LOOKUP_OR_CREATE_APFLOAT_FN_DEFN)
-#undef LOOKUP_OR_CREATE_APFLOAT_FN_DEFN
-} // namespace mlir::func
-
-struct FancyAddFLowering : OpRewritePattern<arith::AddFOp> {
-  using OpRewritePattern::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(arith::AddFOp op,
-                                PatternRewriter &rewriter) const override {
-    // Get APFloat adder function from runtime library.
-    auto parent = op->getParentOfType<ModuleOp>();
-    if (!parent)
-      return failure();
-    if (!llvm::isa<Float8E5M2Type, Float8E4M3Type, Float8E4M3FNType,
-                   Float8E5M2FNUZType, Float8E4M3FNUZType,
-                   Float8E4M3B11FNUZType, Float8E3M4Type, Float4E2M1FNType,
-                   Float6E2M3FNType, Float6E3M2FNType, Float8E8M0FNUType>(
-            op.getType()))
-      return failure();
-    FailureOr<Operation *> adder = lookupOrCreateApFloataddFn(rewriter, parent);
-
-    // Cast operands to 64-bit integers.
-    Location loc = op.getLoc();
-    auto floatTy = cast<FloatType>(op.getType());
-    auto intWType = rewriter.getIntegerType(floatTy.getWidth());
-    auto int64Type = rewriter.getI64Type();
-    Value lhsBits = arith::ExtUIOp::create(
-        rewriter, loc, int64Type,
-        arith::BitcastOp::create(rewriter, loc, intWType, op.getLhs()));
-    Value rhsBits = arith::ExtUIOp::create(
-        rewriter, loc, int64Type,
-        arith::BitcastOp::create(rewriter, loc, intWType, op.getRhs()));
-
-    // Call software implementation of floating point addition.
-    int32_t sem =
-        llvm::APFloatBase::SemanticsToEnum(floatTy.getFloatSemantics());
-    Value semValue = arith::ConstantOp::create(
-        rewriter, loc, rewriter.getI32Type(),
-        rewriter.getIntegerAttr(rewriter.getI32Type(), sem));
-    SmallVector<Value> params = {semValue, lhsBits, rhsBits};
-    auto resultOp =
-        func::CallOp::create(rewriter, loc, TypeRange(rewriter.getI64Type()),
-                             SymbolRefAttr::get(*adder), params);
-
-    // Truncate result to the original width.
-    Value truncatedBits = arith::TruncIOp::create(rewriter, loc, intWType,
-                                                  resultOp->getResult(0));
-    rewriter.replaceAllUsesWith(
-        op, arith::BitcastOp::create(rewriter, loc, floatTy, truncatedBits));
-    return success();
-  }
-};
+static FailureOr<Operation *>
+lookupOrCreateBinaryFn(OpBuilder &b, Operation *moduleOp, StringRef name,
+                       SymbolTableCollection *symbolTables = nullptr) {
+  return lookupOrCreateFn(b, moduleOp,
+                          (llvm::Twine("_mlir_apfloat_") + name).str(),
+                          {IntegerType::get(moduleOp->getContext(), 32),
+                           IntegerType::get(moduleOp->getContext(), 64),
+                           IntegerType::get(moduleOp->getContext(), 64)},
+                          {IntegerType::get(moduleOp->getContext(), 64)},
+                          /*setPrivate*/ true, symbolTables);
+}
 
-void arith::populateArithToAPFloatConversionPatterns(
-    RewritePatternSet &patterns) {
-  patterns.add<FancyAddFLowering>(patterns.getContext());
+template <typename OpTy>
+static LogicalResult rewriteBinaryOp(RewriterBase &rewriter, ModuleOp module,
+                                     OpTy op, StringRef apfloatName) {
+  // Get APFloat function from runtime library.
+  FailureOr<Operation *> fn =
+      lookupOrCreateBinaryFn(rewriter, module, apfloatName);
+  if (failed(fn))
+    return op->emitError("failed to lookup or create APFloat function");
+
+  // Cast operands to 64-bit integers.
+  Location loc = op.getLoc();
+  auto floatTy = cast<FloatType>(op.getType());
+  auto intWType = rewriter.getIntegerType(floatTy.getWidth());
+  auto int64Type = rewriter.getI64Type();
+  Value lhsBits = arith::ExtUIOp::create(
+      rewriter, loc, int64Type,
+      arith::BitcastOp::create(rewriter, loc, intWType, op.getLhs()));
+  Value rhsBits = arith::ExtUIOp::create(
+      rewriter, loc, int64Type,
+      arith::BitcastOp::create(rewriter, loc, intWType, op.getRhs()));
+
+  // Call APFloat function.
+  int32_t sem = llvm::APFloatBase::SemanticsToEnum(floatTy.getFloatSemantics());
+  Value semValue = arith::ConstantOp::create(
+      rewriter, loc, rewriter.getI32Type(),
+      rewriter.getIntegerAttr(rewriter.getI32Type(), sem));
+  SmallVector<Value> params = {semValue, lhsBits, rhsBits};
+  auto resultOp =
+      func::CallOp::create(rewriter, loc, TypeRange(rewriter.getI64Type()),
+                           SymbolRefAttr::get(*fn), params);
+
+  // Truncate result to the original width.
+  Value truncatedBits =
+      arith::TruncIOp::create(rewriter, loc, intWType, resultOp->getResult(0));
+  rewriter.replaceOp(
+      op, arith::BitcastOp::create(rewriter, loc, floatTy, truncatedBits));
+  return success();
 }
 
 namespace {
@@ -126,10 +82,31 @@ struct ArithToAPFloatConversionPass final
       ArithToAPFloatConversionPass>::ArithToAPFloatConversionPassBase;
 
   void runOnOperation() override {
-    Operation *op = getOperation();
-    RewritePatternSet patterns(op->getContext());
-    arith::populateArithToAPFloatConversionPatterns(patterns);
-    if (failed(applyPatternsGreedily(op, std::move(patterns))))
+    ModuleOp module = getOperation();
+    IRRewriter rewriter(getOperation()->getContext());
+    SmallVector<arith::AddFOp> addOps;
+    WalkResult status = module->walk([&](Operation *op) {
+      rewriter.setInsertionPoint(op);
+      LogicalResult result =
+          llvm::TypeSwitch<Operation *, LogicalResult>(op)
+              .Case<arith::AddFOp>([&](arith::AddFOp op) {
+                return rewriteBinaryOp(rewriter, module, op, "add");
+              })
+              .Case<arith::SubFOp>([&](arith::SubFOp op) {
+                return rewriteBinaryOp(rewriter, module, op, "subtract");
+              })
+              .Case<arith::MulFOp>([&](arith::MulFOp op) {
+                return rewriteBinaryOp(rewriter, module, op, "mulitply");
+              })
+              .Case<arith::DivFOp>([&](arith::DivFOp op) {
+                return rewriteBinaryOp(rewriter, module, op, "divide");
+              })
+              .Default([](Operation *op) { return success(); });
+      if (failed(result))
+        return WalkResult::interrupt();
+      return WalkResult::advance();
+    });
+    if (status.wasInterrupted())
       return signalPassFailure();
   }
 };
diff --git a/mlir/lib/ExecutionEngine/APFloatWrappers.cpp b/mlir/lib/ExecutionEngine/APFloatWrappers.cpp
@@ -1,4 +1,4 @@
-//===- ArmRunnerUtils.cpp - Utilities for configuring architecture properties //
+//===- APFloatWrappers.cpp - Software Implementation of FP Arithmetics --- ===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -16,31 +16,29 @@
 #define MLIR_APFLOAT_WRAPPERS_EXPORTED __attribute__((visibility("default")))
 #endif
 
+/// Binary operations without rounding mode.
 #define APFLOAT_BINARY_OP(OP)                                                  \
-  int64_t MLIR_APFLOAT_WRAPPERS_EXPORTED APFloat_##OP(                         \
+  int64_t MLIR_APFLOAT_WRAPPERS_EXPORTED _mlir_apfloat_##OP(                   \
       int32_t semantics, uint64_t a, uint64_t b) {                             \
     const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics(        \
         static_cast<llvm::APFloatBase::Semantics>(semantics));                 \
     unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem);           \
     llvm::APFloat lhs(sem, llvm::APInt(bitWidth, a));                          \
     llvm::APFloat rhs(sem, llvm::APInt(bitWidth, b));                          \
     llvm::APFloatBase::opStatus status = lhs.OP(rhs);                          \
-    assert(status == llvm::APFloatBase::opOK && "expected " #OP                \
-                                                " opstatus to be OK");         \
     return lhs.bitcastToAPInt().getZExtValue();                                \
   }
 
+/// Binary operations with rounding mode.
 #define APFLOAT_BINARY_OP_ROUNDING_MODE(OP, ROUNDING_MODE)                     \
-  int64_t MLIR_APFLOAT_WRAPPERS_EXPORTED APFloat_##OP(                         \
+  int64_t MLIR_APFLOAT_WRAPPERS_EXPORTED _mlir_apfloat_##OP(                   \
       int32_t semantics, uint64_t a, uint64_t b) {                             \
     const llvm::fltSemantics &sem = llvm::APFloatBase::EnumToSemantics(        \
         static_cast<llvm::APFloatBase::Semantics>(semantics));                 \
     unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem);           \
     llvm::APFloat lhs(sem, llvm::APInt(bitWidth, a));                          \
     llvm::APFloat rhs(sem, llvm::APInt(bitWidth, b));                          \
     llvm::APFloatBase::opStatus status = lhs.OP(rhs, ROUNDING_MODE);           \
-    assert(status == llvm::APFloatBase::opOK && "expected " #OP                \
-                                                " opstatus to be OK");         \
     return lhs.bitcastToAPInt().getZExtValue();                                \
   }
 
@@ -68,6 +66,6 @@ void MLIR_APFLOAT_WRAPPERS_EXPORTED printApFloat(int32_t semantics,
   unsigned bitWidth = llvm::APFloatBase::semanticsSizeInBits(sem);
   llvm::APFloat x(sem, llvm::APInt(bitWidth, a));
   double d = x.convertToDouble();
-  std::cout << d << std::endl;
+  fprintf(stdout, "%lg", d);
 }
 }
diff --git a/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir b/mlir/test/Conversion/ArithToApfloat/arith-to-apfloat.mlir
@@ -0,0 +1,38 @@
+// RUN: mlir-opt %s --convert-arith-to-apfloat | FileCheck %s
+
+// CHECK-LABEL:   func.func private @_mlir_apfloat_add(i32, i64, i64) -> i64
+
+// CHECK-LABEL:   func.func @foo() -> f8E4M3FN {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 2.250000e+00 : f8E4M3FN
+// CHECK:           return %[[CONSTANT_0]] : f8E4M3FN
+// CHECK:         }
+
+// CHECK-LABEL:   func.func @entry() {
+// CHECK:           %[[cst:.*]] = arith.constant 1.375000e+00 : f8E4M3FN
+// CHECK:           %[[rhs:.*]] = call @foo() : () -> f8E4M3FN
+// CHECK:           %[[lhs_casted:.*]] = arith.bitcast %[[cst]] : f8E4M3FN to i8
+// CHECK:           %[[lhs_ext:.*]] = arith.extui %[[lhs_casted]] : i8 to i64
+// CHECK:           %[[rhs_casted:.*]] = arith.bitcast %[[rhs]] : f8E4M3FN to i8
+// CHECK:           %[[rhs_ext:.*]] = arith.extui %[[rhs_casted]] : i8 to i64
+// CHECK:           %[[c10_i32:.*]] = arith.constant 10 : i32
+// CHECK:           %[[res:.*]] = call @_mlir_apfloat_add(%[[c10_i32]], %[[lhs_ext]], %[[rhs_ext]]) : (i32, i64, i64) -> i64
+// CHECK:           %[[res_trunc:.*]] = arith.trunci %[[res]] : i64 to i8
+// CHECK:           %[[res_casted:.*]] = arith.bitcast %[[res_trunc]] : i8 to f8E4M3FN
+// CHECK:           vector.print %[[res_casted]] : f8E4M3FN
+// CHECK:           return
+// CHECK:         }
+
+// Put rhs into separate function so that it won't be constant-folded.
+func.func @foo() -> f8E4M3FN {
+  %cst = arith.constant 2.2 : f8E4M3FN
+  return %cst : f8E4M3FN
+}
+
+func.func @entry() {
+  %a = arith.constant 1.4 : f8E4M3FN
+  %b = func.call @foo() : () -> (f8E4M3FN)
+  %c = arith.addf %a, %b : f8E4M3FN
+
+  vector.print %c : f8E4M3FN
+  return
+}
diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-apfloat-emulation.mlir
@@ -1,38 +1,19 @@
-// Check that the ceildivsi lowering is correct.
-// We do not check any poison or UB values, as it is not possible to catch them.
-
-// RUN: mlir-opt %s --convert-arith-to-apfloat
+// RUN: mlir-opt %s --convert-arith-to-apfloat --convert-to-llvm | \
+// RUN:   mlir-runner -e entry --entry-point-result=void \
+// RUN:               --shared-libs=%mlir_c_runner_utils | FileCheck %s
 
 // Put rhs into separate function so that it won't be constant-folded.
-func.func @foo() -> f4E2M1FN {
-  %cst = arith.constant 5.0 : f4E2M1FN
-  return %cst : f4E2M1FN
+func.func @foo() -> f8E4M3FN {
+  %cst = arith.constant 2.2 : f8E4M3FN
+  return %cst : f8E4M3FN
 }
 
 func.func @entry() {
-  %a = arith.constant 5.0 : f4E2M1FN
-  %b = func.call @foo() : () -> (f4E2M1FN)
-  %c = arith.addf %a, %b : f4E2M1FN
-  vector.print %c : f4E2M1FN
+  %a = arith.constant 1.4 : f8E4M3FN
+  %b = func.call @foo() : () -> (f8E4M3FN)
+  %c = arith.addf %a, %b : f8E4M3FN
+
+  // CHECK: 3.5
+  vector.print %c : f8E4M3FN
   return
 }
-
-// CHECK-LABEL:   func.func private @_mlir_apfloat_add(i32, i64, i64) -> i64
-
-// CHECK-LABEL:   func.func @foo() -> f4E2M1FN {
-// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 4.000000e+00 : f4E2M1FN
-// CHECK:           return %[[CONSTANT_0]] : f4E2M1FN
-// CHECK:         }
-
-// CHECK-LABEL:   func.func @entry() {
-// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 18 : i32
-// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 6 : i64
-// CHECK:           %[[VAL_0:.*]] = call @foo() : () -> f4E2M1FN
-// CHECK:           %[[BITCAST_0:.*]] = arith.bitcast %[[VAL_0]] : f4E2M1FN to i4
-// CHECK:           %[[EXTUI_0:.*]] = arith.extui %[[BITCAST_0]] : i4 to i64
-// CHECK:           %[[VAL_1:.*]] = call @_mlir_apfloat_add(%[[CONSTANT_0]], %[[EXTUI_0]], %[[CONSTANT_1]]) : (i32, i64, i64) -> i64
-// CHECK:           %[[TRUNCI_0:.*]] = arith.trunci %[[VAL_1]] : i64 to i4
-// CHECK:           %[[BITCAST_1:.*]] = arith.bitcast %[[TRUNCI_0]] : i4 to f4E2M1FN
-// CHECK:           vector.print %[[BITCAST_1]] : f4E2M1FN
-// CHECK:           return
-// CHECK:         }