[CIR] X86 vector fcmp-sse vector builtins (#167125)

woruyu · liuzhenya · web-flow · commit 73545337f1bf · 2025-11-18T14:43:47.000+08:00
### Summary This PR resolves #163895. Just add fcmp-sse part of X86 vector builtins for CIR. --------- Co-authored-by: liuzhenya <zyliu@siorigin.com>
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -131,6 +131,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     return cir::IntType::get(getContext(), n, false);
   }
 
+  static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) {
+    if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
+      return intType.getWidth();
+    if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
+      return floatType.getWidth();
+
+    llvm_unreachable("Unsupported type in getCIRIntOrFloatBitWidth");
+  }
   cir::IntType getSIntNTy(int n) {
     return cir::IntType::get(getContext(), n, true);
   }
@@ -565,6 +573,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
   }
 
+  cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
+                                 mlir::Value lhs, mlir::Value rhs) {
+    VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
+    IntType integralTy =
+        getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
+    VectorType integralVecTy =
+        VectorType::get(context, integralTy, vecCast.getSize());
+    return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs);
+  }
+
   mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
     return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
   }
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
@@ -259,6 +259,7 @@ struct MissingFeatures {
   static bool emitBranchThroughCleanup() { return false; }
   static bool emitCheckedInBoundsGEP() { return false; }
   static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
+  static bool emitConstrainedFPCall() { return false; }
   static bool emitLifetimeMarkers() { return false; }
   static bool emitLValueAlignmentAssumption() { return false; }
   static bool emitNullCheckForDeleteCalls() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -33,18 +33,53 @@ static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e,
       .getResult();
 }
 
+// OG has unordered comparison as a form of optimization in addition to
+// ordered comparison, while CIR doesn't.
+//
+// This means that we can't encode the comparison code of UGT (unordered
+// greater than), at least not at the CIR level.
+//
+// The boolean shouldInvert compensates for this.
+// For example: to get to the comparison code UGT, we pass in
+// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
+
+// There are several ways to support this otherwise:
+// - register extra CmpOpKind for unordered comparison types and build the
+// translation code for
+//    to go from CIR -> LLVM dialect. Notice we get this naturally with
+//    shouldInvert, benefiting from existing infrastructure, albeit having to
+//    generate an extra `not` at CIR).
+// - Just add extra comparison code to a new VecCmpOpKind instead of
+// cluttering CmpOpKind.
+// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
+// comparison
+// - Just emit the intrinsics call instead of calling this helper, see how the
+// LLVM lowering handles this.
+static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
+                                  llvm::SmallVector<mlir::Value> &ops,
+                                  mlir::Location loc, cir::CmpOpKind pred,
+                                  bool shouldInvert) {
+  assert(!cir::MissingFeatures::cgFPOptionsRAII());
+  // TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
+  assert(!cir::MissingFeatures::emitConstrainedFPCall());
+  mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
+  mlir::Value bitCast = builder.createBitcast(
+      shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
+  return bitCast;
+}
+
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
-                                               const CallExpr *e) {
+                                               const CallExpr *expr) {
   if (builtinID == Builtin::BI__builtin_cpu_is) {
-    cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is");
+    cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
     return {};
   }
   if (builtinID == Builtin::BI__builtin_cpu_supports) {
-    cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports");
+    cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
     return {};
   }
   if (builtinID == Builtin::BI__builtin_cpu_init) {
-    cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init");
+    cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
     return {};
   }
 
@@ -65,7 +100,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   getContext().GetBuiltinType(builtinID, error, &iceArguments);
   assert(error == ASTContext::GE_None && "Error while getting builtin type.");
 
-  for (auto [idx, arg] : llvm::enumerate(e->arguments()))
+  for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
     ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
 
   CIRGenBuilderTy &builder = getBuilder();
@@ -75,15 +110,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   default:
     return {};
   case X86::BI_mm_clflush:
-    return emitIntrinsicCallOp(*this, e, "x86.sse2.clflush", voidTy, ops[0]);
+    return emitIntrinsicCallOp(*this, expr, "x86.sse2.clflush", voidTy, ops[0]);
   case X86::BI_mm_lfence:
-    return emitIntrinsicCallOp(*this, e, "x86.sse2.lfence", voidTy);
+    return emitIntrinsicCallOp(*this, expr, "x86.sse2.lfence", voidTy);
   case X86::BI_mm_pause:
-    return emitIntrinsicCallOp(*this, e, "x86.sse2.pause", voidTy);
+    return emitIntrinsicCallOp(*this, expr, "x86.sse2.pause", voidTy);
   case X86::BI_mm_mfence:
-    return emitIntrinsicCallOp(*this, e, "x86.sse2.mfence", voidTy);
+    return emitIntrinsicCallOp(*this, expr, "x86.sse2.mfence", voidTy);
   case X86::BI_mm_sfence:
-    return emitIntrinsicCallOp(*this, e, "x86.sse.sfence", voidTy);
+    return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
   case X86::BI_mm_prefetch:
   case X86::BI__rdtsc:
   case X86::BI__builtin_ia32_rdtscp:
@@ -96,7 +131,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_undef128:
   case X86::BI__builtin_ia32_undef256:
   case X86::BI__builtin_ia32_undef512:
-    cgm.errorNYI(e->getSourceRange(),
+    cgm.errorNYI(expr->getSourceRange(),
                  std::string("unimplemented X86 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return {};
@@ -118,12 +153,12 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
     index &= numElts - 1;
 
     cir::ConstantOp indexVal =
-        builder.getUInt64(index, getLoc(e->getExprLoc()));
+        builder.getUInt64(index, getLoc(expr->getExprLoc()));
 
     // These builtins exist so we can ensure the index is an ICE and in range.
     // Otherwise we could just do this in the header file.
-    return cir::VecExtractOp::create(builder, getLoc(e->getExprLoc()), ops[0],
-                                     indexVal);
+    return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
+                                     ops[0], indexVal);
   }
   case X86::BI__builtin_ia32_vec_set_v4hi:
   case X86::BI__builtin_ia32_vec_set_v16qi:
@@ -758,10 +793,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_cmpunordpd:
   case X86::BI__builtin_ia32_cmpneqps:
   case X86::BI__builtin_ia32_cmpneqpd:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_cmpnltps:
   case X86::BI__builtin_ia32_cmpnltpd:
+    return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
+                          cir::CmpOpKind::lt, /*shouldInvert=*/true);
   case X86::BI__builtin_ia32_cmpnleps:
   case X86::BI__builtin_ia32_cmpnlepd:
+    return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
+                          cir::CmpOpKind::le, /*shouldInvert=*/true);
   case X86::BI__builtin_ia32_cmpordps:
   case X86::BI__builtin_ia32_cmpordpd:
   case X86::BI__builtin_ia32_cmpph128_mask:
@@ -846,7 +889,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
   case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
   case X86::BI__builtin_ia32_prefetchi:
-    cgm.errorNYI(e->getSourceRange(),
+    cgm.errorNYI(expr->getSourceRange(),
                  std::string("unimplemented X86 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return {};
diff --git a/clang/test/CIR/CodeGen/builtin-fcmp-sse.c b/clang/test/CIR/CodeGen/builtin-fcmp-sse.c