Address code review comments

AmrDeveloper · AmrDeveloper · commit 6879b7063c08 · 2025-08-16T15:51:51.000+02:00
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -2985,8 +2985,8 @@ def CIR_ComplexMulOp : CIR_Op<"complex.mul", [
     The `cir.complex.mul` operation takes two complex numbers and returns
     their product.
 
-    Range is used to select the implementation used when the operation
-    is lowered to the LLVM dialect. For multiplication, 'improved',
+    The `range` attribute is used to select the algorithm used when the
+    operation is lowered to the LLVM dialect. For multiplication, 'improved',
     'promoted', and 'basic' are all handled equivalently, producing the
     algebraic formula with no special handling for NaN value. If 'full' is
     used, a runtime-library function is called if one of the intermediate
@@ -3019,15 +3019,19 @@ def CIR_ComplexDivOp : CIR_Op<"complex.div", [
   let summary = "Complex division";
   let description = [{
     The `cir.complex.div` operation takes two complex numbers and returns
-    their division.
-
-    Range is used to select the implementation used when the operation
-    is lowered to the LLVM dialect. For division, 'improved' and
-    'promoted' are all handled equivalently, producing the
-    Smith's algorithms for Complex division. If 'full' is used,
-    a runtime-library function is called if one of the intermediate
-    calculations produced a NaN value, and for 'basic' algebraic formula with
-    no special handling for NaN value will be used.
+    their quotient.
+
+    The `range` attribute is used to select the algorithm used when
+    the operation is lowered to the LLVM dialect. For division, 'improved'
+    producing the Smith's algorithms for Complex division with no special
+    handling for NaN values. If 'promoted' is used, the values are promoted
+    to a higher precision type, if possible,  and the calculation is performed
+    using the algebraic formula. We only fall back on Smith's algorithm when
+    the target does not support a higher precision type. Also, this only
+    applies to floating-point types with no special handling for NaN values.
+    If 'full' is used, a runtime-library function is called if one of the
+    intermediate calculations produced a NaN value. and for 'basic' algebraic
+    formula with no special handling for the NaN value will be used.
 
     Example:
 
@@ -3040,8 +3044,7 @@ def CIR_ComplexDivOp : CIR_Op<"complex.div", [
   let arguments = (ins
     CIR_ComplexType:$lhs,
     CIR_ComplexType:$rhs,
-    CIR_ComplexRangeKind:$range,
-    UnitAttr:$promoted
+    CIR_ComplexRangeKind:$range
   );
 
   let results = (outs CIR_ComplexType:$result);
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -10,7 +10,6 @@ namespace {
 class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> {
   CIRGenFunction &cgf;
   CIRGenBuilderTy &builder;
-  bool fpHasBeenPromoted = false;
 
 public:
   explicit ComplexExprEmitter(CIRGenFunction &cgf)
@@ -131,43 +130,9 @@ class ComplexExprEmitter : public StmtVisitor<ComplexExprEmitter, mlir::Value> {
   mlir::Value emitBinMul(const BinOpInfo &op);
   mlir::Value emitBinDiv(const BinOpInfo &op);
 
-  QualType higherPrecisionTypeForComplexArithmetic(QualType elementType,
-                                                   bool isDivOpCode) {
-    ASTContext &astContext = cgf.getContext();
-    const QualType higherElementType =
-        astContext.GetHigherPrecisionFPType(elementType);
-    const llvm::fltSemantics &elementTypeSemantics =
-        astContext.getFloatTypeSemantics(elementType);
-    const llvm::fltSemantics &higherElementTypeSemantics =
-        astContext.getFloatTypeSemantics(higherElementType);
-
-    // Check that the promoted type can handle the intermediate values without
-    // overflowing. This can be interpreted as:
-    // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <=
-    // LargerType.LargestFiniteVal.
-    // In terms of exponent it gives this formula:
-    // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal
-    // doubles the exponent of SmallerType.LargestFiniteVal)
-    if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <=
-        llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) {
-      fpHasBeenPromoted = true;
-      return astContext.getComplexType(higherElementType);
-    }
-
-    // The intermediate values can't be represented in the promoted type
-    // without overflowing.
-    return QualType();
-  }
-
   QualType getPromotionType(QualType ty, bool isDivOpCode = false) {
     if (auto *complexTy = ty->getAs<ComplexType>()) {
       QualType elementTy = complexTy->getElementType();
-      if (isDivOpCode && elementTy->isFloatingType() &&
-          cgf.getLangOpts().getComplexRange() ==
-              LangOptions::ComplexRangeKind::CX_Promoted) {
-        return higherPrecisionTypeForComplexArithmetic(elementTy, isDivOpCode);
-      }
-
       if (elementTy.UseExcessPrecision(cgf.getContext()))
         return cgf.getContext().getComplexType(cgf.getContext().FloatTy);
     }
@@ -896,8 +861,7 @@ mlir::Value ComplexExprEmitter::emitBinDiv(const BinOpInfo &op) {
       mlir::isa<cir::ComplexType>(op.rhs.getType())) {
     cir::ComplexRangeKind rangeKind =
         getComplexRangeAttr(op.fpFeatures.getComplexRange());
-    return builder.create<cir::ComplexDivOp>(op.loc, op.lhs, op.rhs, rangeKind,
-                                             fpHasBeenPromoted);
+    return builder.create<cir::ComplexDivOp>(op.loc, op.lhs, op.rhs, rangeKind);
   }
 
   cgf.cgm.errorNYI("ComplexExprEmitter::emitBinMu between Complex & Scalar");
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -8,6 +8,7 @@
 
 #include "PassDetail.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIROpsEnums.h"
@@ -312,22 +313,125 @@ buildRangeReductionComplexDiv(CIRBaseBuilderTy &builder, mlir::Location loc,
   return ternary.getResult();
 }
 
-static mlir::Value lowerComplexDiv(LoweringPreparePass &pass,
-                                   CIRBaseBuilderTy &builder,
-                                   mlir::Location loc, cir::ComplexDivOp op,
-                                   mlir::Value lhsReal, mlir::Value lhsImag,
-                                   mlir::Value rhsReal, mlir::Value rhsImag) {
+static mlir::Type higherPrecisionElementTypeForComplexArithmetic(
+    mlir::MLIRContext &context, clang::ASTContext &cc,
+    CIRBaseBuilderTy &builder, mlir::Type elementType) {
+
+  auto getHigherPrecisionFPType = [&context](mlir::Type type) -> mlir::Type {
+    if (mlir::isa<cir::FP16Type>(type))
+      return cir::SingleType::get(&context);
+
+    if (mlir::isa<cir::SingleType>(type) || mlir::isa<cir::BF16Type>(type))
+      return cir::DoubleType::get(&context);
+
+    if (mlir::isa<cir::DoubleType>(type))
+      return cir::LongDoubleType::get(&context, type);
+
+    return type;
+  };
+
+  auto getFloatTypeSemantics =
+      [&cc](mlir::Type type) -> const llvm::fltSemantics & {
+    const clang::TargetInfo &info = cc.getTargetInfo();
+    if (mlir::isa<cir::FP16Type>(type))
+      return info.getHalfFormat();
+
+    if (mlir::isa<cir::BF16Type>(type))
+      return info.getBFloat16Format();
+
+    if (mlir::isa<cir::SingleType>(type))
+      return info.getFloatFormat();
+
+    if (mlir::isa<cir::DoubleType>(type))
+      return info.getDoubleFormat();
+
+    if (mlir::isa<cir::LongDoubleType>(type)) {
+      if (cc.getLangOpts().OpenMP && cc.getLangOpts().OpenMPIsTargetDevice)
+        llvm_unreachable("NYI Float type semantics with OpenMP");
+      return info.getLongDoubleFormat();
+    }
+
+    if (mlir::isa<cir::FP128Type>(type)) {
+      if (cc.getLangOpts().OpenMP && cc.getLangOpts().OpenMPIsTargetDevice)
+        llvm_unreachable("NYI Float type semantics with OpenMP");
+      return info.getFloat128Format();
+    }
+
+    assert(false && "Unsupported float type semantics");
+  };
+
+  const mlir::Type higherElementType = getHigherPrecisionFPType(elementType);
+  const llvm::fltSemantics &elementTypeSemantics =
+      getFloatTypeSemantics(elementType);
+  const llvm::fltSemantics &higherElementTypeSemantics =
+      getFloatTypeSemantics(higherElementType);
+
+  // Check that the promoted type can handle the intermediate values without
+  // overflowing. This can be interpreted as:
+  // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal) * 2 <=
+  //      LargerType.LargestFiniteVal.
+  // In terms of exponent it gives this formula:
+  // (SmallerType.LargestFiniteVal * SmallerType.LargestFiniteVal
+  // doubles the exponent of SmallerType.LargestFiniteVal)
+  if (llvm::APFloat::semanticsMaxExponent(elementTypeSemantics) * 2 + 1 <=
+      llvm::APFloat::semanticsMaxExponent(higherElementTypeSemantics)) {
+    return higherElementType;
+  }
+
+  // The intermediate values can't be represented in the promoted type
+  // without overflowing.
+  return {};
+}
+
+static mlir::Value
+lowerComplexDiv(LoweringPreparePass &pass, CIRBaseBuilderTy &builder,
+                mlir::Location loc, cir::ComplexDivOp op, mlir::Value lhsReal,
+                mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag,
+                mlir::MLIRContext &mlirCx, clang::ASTContext &cc) {
   cir::ComplexType complexTy = op.getType();
   if (mlir::isa<cir::FPTypeInterface>(complexTy.getElementType())) {
     cir::ComplexRangeKind range = op.getRange();
-    if (range == cir::ComplexRangeKind::Improved ||
-        (range == cir::ComplexRangeKind::Promoted && !op.getPromoted()))
+    if (range == cir::ComplexRangeKind::Improved)
       return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag,
                                            rhsReal, rhsImag);
+
     if (range == cir::ComplexRangeKind::Full)
       return buildComplexBinOpLibCall(pass, builder, &getComplexDivLibCallName,
                                       loc, complexTy, lhsReal, lhsImag, rhsReal,
                                       rhsImag);
+
+    if (range == cir::ComplexRangeKind::Promoted) {
+      mlir::Type originalElementType = complexTy.getElementType();
+      mlir::Type higherPrecisionElementType =
+          higherPrecisionElementTypeForComplexArithmetic(mlirCx, cc, builder,
+                                                         originalElementType);
+
+      if (!higherPrecisionElementType)
+        return buildRangeReductionComplexDiv(builder, loc, lhsReal, lhsImag,
+                                             rhsReal, rhsImag);
+
+      cir::CastKind floatingCastKind = cir::CastKind::floating;
+      lhsReal = builder.createCast(floatingCastKind, lhsReal,
+                                   higherPrecisionElementType);
+      lhsImag = builder.createCast(floatingCastKind, lhsImag,
+                                   higherPrecisionElementType);
+      rhsReal = builder.createCast(floatingCastKind, rhsReal,
+                                   higherPrecisionElementType);
+      rhsImag = builder.createCast(floatingCastKind, rhsImag,
+                                   higherPrecisionElementType);
+
+      mlir::Value algebraicResult = buildAlgebraicComplexDiv(
+          builder, loc, lhsReal, lhsImag, rhsReal, rhsImag);
+
+      mlir::Value resultReal = builder.createComplexReal(loc, algebraicResult);
+      mlir::Value resultImag = builder.createComplexImag(loc, algebraicResult);
+
+      mlir::Value finalReal =
+          builder.createCast(floatingCastKind, resultReal, originalElementType);
+      mlir::Value finalImag =
+          builder.createCast(floatingCastKind, resultImag, originalElementType);
+      return builder.createComplexCreate(loc, finalReal, finalImag);
+    }
   }
 
   return buildAlgebraicComplexDiv(builder, loc, lhsReal, lhsImag, rhsReal,
@@ -345,8 +449,9 @@ void LoweringPreparePass::lowerComplexDivOp(cir::ComplexDivOp op) {
   mlir::Value rhsReal = builder.createComplexReal(loc, rhs);
   mlir::Value rhsImag = builder.createComplexImag(loc, rhs);
 
-  mlir::Value loweredResult = lowerComplexDiv(*this, builder, loc, op, lhsReal,
-                                              lhsImag, rhsReal, rhsImag);
+  mlir::Value loweredResult =
+      lowerComplexDiv(*this, builder, loc, op, lhsReal, lhsImag, rhsReal,
+                      rhsImag, getContext(), *astCtx);
   op.replaceAllUsesWith(loweredResult);
   op.erase();
 }
diff --git a/clang/test/CIR/CodeGen/complex-mul-div.cpp b/clang/test/CIR/CodeGen/complex-mul-div.cpp
@@ -476,25 +476,21 @@ void foo3() {
 // OGCG-IMPROVED:  store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4
 // OGCG-IMPROVED:  store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4
 
-// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.double>
+// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.div {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.float>
 
 // LLVM-PROMOTED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
 // LLVM-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
 // LLVM-PROMOTED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
 // LLVM-PROMOTED: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM-PROMOTED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
 // LLVM-PROMOTED: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
 // LLVM-PROMOTED: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
-// LLVM-PROMOTED: %[[A_REAL_F64:.*]] = fpext float %[[A_REAL]] to double
-// LLVM-PROMOTED: %[[A_IMAG_F64:.*]] = fpext float %[[A_IMAG]] to double
-// LLVM-PROMOTED: %[[TMP_A_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[A_REAL_F64]], 0
-// LLVM-PROMOTED: %[[A_CF64:.*]] = insertvalue { double, double } %[[TMP_A_CF64]], double %[[A_IMAG_F64]], 1
-// LLVM-PROMOTED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
 // LLVM-PROMOTED: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
 // LLVM-PROMOTED: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM-PROMOTED: %[[A_REAL_F64:.*]] = fpext float %[[A_REAL]] to double
+// LLVM-PROMOTED: %[[A_IMAG_F64:.*]] = fpext float %[[A_IMAG]] to double
 // LLVM-PROMOTED: %[[B_REAL_F64:.*]] = fpext float %[[B_REAL]] to double
 // LLVM-PROMOTED: %[[B_IMAG_F64:.*]] = fpext float %[[B_IMAG]] to double
-// LLVM-PROMOTED: %[[TMP_B_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[B_REAL_F64]], 0
-// LLVM-PROMOTED: %[[B_CF64:.*]] = insertvalue { double, double } %[[TMP_B_CF64]], double %[[B_IMAG_F64]], 1
 // LLVM-PROMOTED: %[[MUL_AR_BR:.*]] = fmul double %[[A_REAL_F64]], %[[B_REAL_F64]]
 // LLVM-PROMOTED: %[[MUL_AI_BI:.*]] = fmul double %[[A_IMAG_F64]], %[[B_IMAG_F64]]
 // LLVM-PROMOTED: %[[MUL_BR_BR:.*]] = fmul double %[[B_REAL_F64]], %[[B_REAL_F64]]
@@ -503,16 +499,16 @@ void foo3() {
 // LLVM-PROMOTED: %[[ADD_BRBR_BIBI:.*]] = fadd double %[[MUL_BR_BR]], %[[MUL_BI_BI]]
 // LLVM-PROMOTED: %[[RESULT_REAL:.*]] = fdiv double %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]]
 // LLVM-PROMOTED: %[[MUL_AI_BR:.*]] = fmul double %[[A_IMAG_F64]], %[[B_REAL_F64]]
-// LLVM-PROMOTED: %[[MUL_AR_BI:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]]
-// LLVM-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]]
-// LLVM-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_ARBI]], %23
-// LLVM-PROMOTED: %[[TMP_RESULT_CF64:.*]] = insertvalue { double, double } {{.*}}, double %[[RESULT_REAL]], 0
-// LLVM-PROMOTED: %[[RESULT_CF64:.*]] = insertvalue { double, double } %[[TMP_RESULT_CF64]], double %[[RESULT_IMAG]], 1
+// LLVM-PROMOTED: %[[MUL_AR_BR:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]]
+// LLVM-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BR]]
+// LLVM-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_ARBI]], %[[ADD_BRBR_BIBI]]
+// LLVM-PROMOTED: %[[TMP_RESULT_F64:.*]] = insertvalue { double, double } {{.*}}, double %[[RESULT_REAL]], 0
+// LLVM-PROMOTED: %[[RESULT_F64:.*]] = insertvalue { double, double } %[[TMP_RESULT_F64]], double %[[RESULT_IMAG]], 1
 // LLVM-PROMOTED: %[[RESULT_REAL_F32:.*]] = fptrunc double %[[RESULT_REAL]] to float
 // LLVM-PROMOTED: %[[RESULT_IMAG_F32:.*]] = fptrunc double %[[RESULT_IMAG]] to float
-// LLVM-PROMOTED: %[[TMP_RESULT_CF32:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL_F32]], 0
-// LLVM-PROMOTED: %[[RESULT_CF32:.*]] = insertvalue { float, float } %[[TMP_RESULT_CF32]], float %[[RESULT_IMAG_F32]], 1
-// LLVM-PROMOTED: store { float, float } %[[RESULT_CF32]], ptr %[[C_ADDR]], align 4
+// LLVM-PROMOTED: %[[TMP_RESULT_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL_F32]], 0
+// LLVM-PROMOTED: %[[RESULT_F32:.*]] = insertvalue { float, float } %[[TMP_RESULT_F32]], float %[[RESULT_IMAG_F32]], 1
+// LLVM-PROMOTED: store { float, float } %[[RESULT_F32]], ptr %[[C_ADDR]], align 4
 
 // OGCG-PROMOTED:  %[[A_ADDR:.*]] = alloca { float, float }, align 4
 // OGCG-PROMOTED: %[[B_ADDR:.*]] = alloca { float, float }, align 4
@@ -537,9 +533,9 @@ void foo3() {
 // OGCG-PROMOTED: %[[ADD_BRBR_BIBI:.*]] = fadd double %[[MUL_BR_BR]], %[[MUL_BI_BI]]
 // OGCG-PROMOTED: %[[MUL_AI_BR:.*]] = fmul double %[[A_IMAG_F64]], %[[B_REAL_F64]]
 // OGCG-PROMOTED: %[[MUL_AR_BI:.*]] = fmul double %[[A_REAL_F64]], %[[B_IMAG_F64]]
-// OGCG-PROMOTED: %[[SUB_AIBR_BRBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]]
+// OGCG-PROMOTED: %[[SUB_AIBR_ARBI:.*]] = fsub double %[[MUL_AI_BR]], %[[MUL_AR_BI]]
 // OGCG-PROMOTED: %[[RESULT_REAL:.*]] = fdiv double %[[ADD_ARBR_AIBI]], %[[ADD_BRBR_BIBI]]
-// OGCG-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_BRBI]], %[[ADD_BRBR_BIBI]]
+// OGCG-PROMOTED: %[[RESULT_IMAG:.*]] = fdiv double %[[SUB_AIBR_ARBI]], %[[ADD_BRBR_BIBI]]
 // OGCG-PROMOTED: %[[UNPROMOTION_RESULT_REAL:.*]] = fptrunc double %[[RESULT_REAL]] to float
 // OGCG-PROMOTED: %[[UNPROMOTION_RESULT_IMAG:.*]] = fptrunc double %[[RESULT_IMAG]] to float
 // OGCG-PROMOTED: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0