replace CRTP with a simple trait in a common flow

ftynse · ftynse · commit cd3148548e2d · 2025-01-20T17:36:04.000+01:00
diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -16,11 +16,51 @@
 
 namespace mlir {
 
-template <typename SourceOp, typename DerivedTy>
-struct OpToFuncCallLoweringBase : public ConvertOpToLLVMPattern<SourceOp> {
+namespace {
+/// Detection trait tor the `getFastmath` instance method.
+template <typename T>
+using has_get_fastmath_t = decltype(std::declval<T>().getFastmath());
+} // namespace
+
+/// Rewriting that replaces SourceOp with a CallOp to `f32Func` or `f64Func` or
+/// `f32ApproxFunc` or `f16Func` or `i32Type` depending on the element type and
+/// the fastMathFlag of that Op, if present. The function declaration is added
+/// in case it was not added before.
+///
+/// If the input values are of bf16 type (or f16 type if f16Func is empty), the
+/// value is first casted to f32, the function called and then the result casted
+/// back.
+///
+/// Example with NVVM:
+///   %exp_f32 = math.exp %arg_f32 : f32
+///
+/// will be transformed into
+///   llvm.call @__nv_expf(%arg_f32) : (f32) -> f32
+///
+/// If the fastMathFlag attribute of SourceOp is `afn` or `fast`, this Op lowers
+/// to the approximate calculation function.
+///
+/// Also example with NVVM:
+///   %exp_f32 = math.exp %arg_f32 fastmath<afn> : f32
+///
+/// will be transformed into
+///   llvm.call @__nv_fast_expf(%arg_f32) : (f32) -> f32
+///
+/// Final example with NVVM:
+///   %pow_f32 = math.fpowi %arg_f32, %arg_i32
+///
+/// will be transformed into
+///   llvm.call @__nv_powif(%arg_f32, %arg_i32) : (f32, i32) -> f32
+template <typename SourceOp>
+struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
 public:
-  explicit OpToFuncCallLoweringBase(const LLVMTypeConverter &lowering)
-      : ConvertOpToLLVMPattern<SourceOp>(lowering) {}
+  explicit OpToFuncCallLowering(const LLVMTypeConverter &lowering,
+                                StringRef f32Func, StringRef f64Func,
+                                StringRef f32ApproxFunc, StringRef f16Func,
+                                StringRef i32Func = "")
+      : ConvertOpToLLVMPattern<SourceOp>(lowering), f32Func(f32Func),
+        f64Func(f64Func), f32ApproxFunc(f32ApproxFunc), f16Func(f16Func),
+        i32Func(i32Func) {}
 
   LogicalResult
   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,
@@ -46,15 +86,12 @@ struct OpToFuncCallLoweringBase : public ConvertOpToLLVMPattern<SourceOp> {
 
     SmallVector<Value, 1> castedOperands;
     for (Value operand : adaptor.getOperands())
-      castedOperands.push_back(
-          static_cast<const DerivedTy *>(this)->maybeCast(operand, rewriter));
+      castedOperands.push_back(maybeCast(operand, rewriter));
 
     Type resultType = castedOperands.front().getType();
     Type funcType = getFunctionType(resultType, castedOperands);
-    StringRef funcName =
-        static_cast<const DerivedTy *>(this)
-            ->getFunctionName(
-                cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
+    StringRef funcName = getFunctionName(
+        cast<LLVM::LLVMFunctionType>(funcType).getReturnType(), op);
     if (funcName.empty())
       return failure();
 
@@ -67,14 +104,28 @@ struct OpToFuncCallLoweringBase : public ConvertOpToLLVMPattern<SourceOp> {
       return success();
     }
 
+    assert(callOp.getResult().getType().isF32() &&
+           "only f32 types are supposed to be truncated back");
     Value truncated = rewriter.create<LLVM::FPTruncOp>(
         op->getLoc(), adaptor.getOperands().front().getType(),
         callOp.getResult());
     rewriter.replaceOp(op, {truncated});
     return success();
   }
 
-private:
+  Value maybeCast(Value operand, PatternRewriter &rewriter) const {
+    Type type = operand.getType();
+    if (!isa<Float16Type, BFloat16Type>(type))
+      return operand;
+
+    // if there's a f16 function, no need to cast f16 values
+    if (!f16Func.empty() && isa<Float16Type>(type))
+      return operand;
+
+    return rewriter.create<LLVM::FPExtOp>(
+        operand.getLoc(), Float32Type::get(rewriter.getContext()), operand);
+  }
+
   Type getFunctionType(Type resultType, ValueRange operands) const {
     SmallVector<Type> operandTypes(operands.getTypes());
     return LLVM::LLVMFunctionType::get(resultType, operandTypes);
@@ -85,7 +136,8 @@ struct OpToFuncCallLoweringBase : public ConvertOpToLLVMPattern<SourceOp> {
     using LLVM::LLVMFuncOp;
 
     auto funcAttr = StringAttr::get(op->getContext(), funcName);
-    auto funcOp = SymbolTable::lookupNearestSymbolFrom<LLVMFuncOp>(op, funcAttr);
+    auto funcOp =
+        SymbolTable::lookupNearestSymbolFrom<LLVMFuncOp>(op, funcAttr);
     if (funcOp)
       return funcOp;
 
@@ -94,153 +146,37 @@ struct OpToFuncCallLoweringBase : public ConvertOpToLLVMPattern<SourceOp> {
     OpBuilder b(parentFunc);
     return b.create<LLVMFuncOp>(op->getLoc(), funcName, funcType);
   }
-};
-
-/// Rewriting that replaces SourceOp with a CallOp to `f32Func` or `f64Func` or
-/// `f32ApproxFunc` or `f16Func` depending on the element type and the
-/// fastMathFlag of that Op. The function declaration is added in case it was
-/// not added before.
-///
-/// If the input values are of bf16 type (or f16 type if f16Func is empty), the
-/// value is first casted to f32, the function called and then the result casted
-/// back.
-///
-/// Example with NVVM:
-///   %exp_f32 = math.exp %arg_f32 : f32
-///
-/// will be transformed into
-///   llvm.call @__nv_expf(%arg_f32) : (f32) -> f32
-///
-/// If the fastMathFlag attribute of SourceOp is `afn` or `fast`, this Op lowers
-/// to the approximate calculation function.
-///
-/// Also example with NVVM:
-///   %exp_f32 = math.exp %arg_f32 fastmath<afn> : f32
-///
-/// will be transformed into
-///   llvm.call @__nv_fast_expf(%arg_f32) : (f32) -> f32
-template <typename SourceOp>
-struct OpToFuncCallLowering
-    : public OpToFuncCallLoweringBase<SourceOp,
-                                      OpToFuncCallLowering<SourceOp>> {
-public:
-  explicit OpToFuncCallLowering(const LLVMTypeConverter &lowering,
-                                StringRef f32Func, StringRef f64Func,
-                                StringRef f32ApproxFunc, StringRef f16Func)
-      : OpToFuncCallLoweringBase<SourceOp, OpToFuncCallLowering<SourceOp>>(
-            lowering),
-        f32Func(f32Func), f64Func(f64Func), f32ApproxFunc(f32ApproxFunc),
-        f16Func(f16Func) {}
-
-  Value maybeCast(Value operand, PatternRewriter &rewriter) const {
-    Type type = operand.getType();
-    if (!isa<Float16Type, BFloat16Type>(type))
-      return operand;
-
-    // if there's a f16 function, no need to cast f16 values
-    if (!f16Func.empty() && isa<Float16Type>(type))
-      return operand;
-
-    return rewriter.create<LLVM::FPExtOp>(
-        operand.getLoc(), Float32Type::get(rewriter.getContext()), operand);
-  }
 
   StringRef getFunctionName(Type type, SourceOp op) const {
-    arith::FastMathFlags flag = op.getFastmath();
+    bool useApprox = false;
+    if constexpr (llvm::is_detected<has_get_fastmath_t, SourceOp>::value) {
+      arith::FastMathFlags flag = op.getFastmath();
+      useApprox = ((uint32_t)arith::FastMathFlags::afn & (uint32_t)flag) &&
+                  !f32ApproxFunc.empty();
+    }
+
     if (isa<Float16Type>(type))
       return f16Func;
     if (isa<Float32Type>(type)) {
-      if (((uint32_t)arith::FastMathFlags::afn & (uint32_t)flag) &&
-          !f32ApproxFunc.empty())
+      if (useApprox)
         return f32ApproxFunc;
-      else
-        return f32Func;
+      return f32Func;
     }
     if (isa<Float64Type>(type))
       return f64Func;
+
+    if (type.isInteger(32))
+      return i32Func;
     return "";
   }
 
   const std::string f32Func;
   const std::string f64Func;
   const std::string f32ApproxFunc;
   const std::string f16Func;
-};
-
-/// Rewriting that replace SourceOp with a CallOp to `i32Func`
-/// The function declaration is added in case it was not added before.
-/// This assumes that all types integral.
-///
-/// Example with NVVM:
-///   %abs_i32 = math.iabs %arg_i32 : i32
-///
-/// will be transformed into
-///   llvm.call @__nv_abs(%arg_i32) : (i32) -> i32
-///
-template <typename SourceOp>
-struct IntOpToFuncCallLowering
-    : public OpToFuncCallLoweringBase<SourceOp,
-                                      IntOpToFuncCallLowering<SourceOp>> {
-public:
-  explicit IntOpToFuncCallLowering(const LLVMTypeConverter &lowering,
-                                   StringRef i32Func)
-      : OpToFuncCallLoweringBase<SourceOp, IntOpToFuncCallLowering<SourceOp>>(
-            lowering),
-        i32Func(i32Func) {}
-
-  Value maybeCast(Value operand, PatternRewriter &rewriter) const {
-    return operand;
-  }
-
-  StringRef getFunctionName(Type type, SourceOp op) const {
-    IntegerType itype = dyn_cast<IntegerType>(type);
-    if (!itype || itype.getWidth() != 32)
-      return "";
-    return i32Func;
-  }
-
   const std::string i32Func;
 };
 
-/// Rewriting that replaces SourceOp with a CallOp to `f32Func` or `f64Func`,
-/// depending on the type of the result. This assumes that the first argument is
-/// a floating type and the second argument is an integer type.
-///
-/// Example with NVVM:
-///   %result32 = math.fpowi %arg_f32, %arg_i32 : f32, i32
-///
-/// will be transformed into
-///   llvm.call @__nv_powf(%arg_f32, %arg_i32) : (f32, i32) -> f32
-///
-template <typename SourceOp>
-struct FloatIntOpToFuncCallLowering
-    : public OpToFuncCallLoweringBase<SourceOp,
-                                      FloatIntOpToFuncCallLowering<SourceOp>> {
-public:
-  explicit FloatIntOpToFuncCallLowering(const LLVMTypeConverter &lowering,
-                                        StringRef f32Func, StringRef f64Func)
-      : OpToFuncCallLoweringBase<SourceOp,
-                                 FloatIntOpToFuncCallLowering<SourceOp>>(
-            lowering),
-        f32Func(f32Func), f64Func(f64Func) {}
-
-  Value maybeCast(Value operand, PatternRewriter &rewriter) const {
-    return operand;
-  }
-
-  StringRef getFunctionName(Type type, SourceOp op) const {
-    if (isa<Float32Type>(type)) {
-      return f32Func;
-    }
-    if (isa<Float64Type>(type))
-      return f64Func;
-    return "";
-  }
-
-  const std::string f32Func;
-  const std::string f64Func;
-};
-
 } // namespace mlir
 
 #endif // MLIR_CONVERSION_GPUCOMMON_OPTOFUNCCALLLOWERING_H_
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -451,15 +451,15 @@ static void populateIntOpPatterns(const LLVMTypeConverter &converter,
                                   RewritePatternSet &patterns,
                                   StringRef i32Func) {
   patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
-  patterns.add<IntOpToFuncCallLowering<OpTy>>(converter, i32Func);
+  patterns.add<OpToFuncCallLowering<OpTy>>(converter, "", "", "", "", i32Func);
 }
 
 template <typename OpTy>
 static void populateFloatIntOpPatterns(const LLVMTypeConverter &converter,
                                        RewritePatternSet &patterns,
                                        StringRef f32Func, StringRef f64Func) {
   patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
-  patterns.add<FloatIntOpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func);
+  patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func, "", "");
 }
 
 void mlir::populateGpuSubgroupReduceOpLoweringPattern(