Skip to content

Commit 7354533

Browse files
woruyuliuzhenya
andauthored
[CIR] X86 vector fcmp-sse vector builtins (#167125)
### Summary This PR resolves #163895. Just add fcmp-sse part of X86 vector builtins for CIR. --------- Co-authored-by: liuzhenya <[email protected]>
1 parent 485b3af commit 7354533

File tree

4 files changed

+290
-15
lines changed

4 files changed

+290
-15
lines changed

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
131131
return cir::IntType::get(getContext(), n, false);
132132
}
133133

134+
static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) {
135+
if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
136+
return intType.getWidth();
137+
if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
138+
return floatType.getWidth();
139+
140+
llvm_unreachable("Unsupported type in getCIRIntOrFloatBitWidth");
141+
}
134142
cir::IntType getSIntNTy(int n) {
135143
return cir::IntType::get(getContext(), n, true);
136144
}
@@ -565,6 +573,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
565573
return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
566574
}
567575

576+
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
577+
mlir::Value lhs, mlir::Value rhs) {
578+
VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
579+
IntType integralTy =
580+
getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
581+
VectorType integralVecTy =
582+
VectorType::get(context, integralTy, vecCast.getSize());
583+
return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs);
584+
}
585+
568586
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
569587
return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
570588
}

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ struct MissingFeatures {
259259
static bool emitBranchThroughCleanup() { return false; }
260260
static bool emitCheckedInBoundsGEP() { return false; }
261261
static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
262+
static bool emitConstrainedFPCall() { return false; }
262263
static bool emitLifetimeMarkers() { return false; }
263264
static bool emitLValueAlignmentAssumption() { return false; }
264265
static bool emitNullCheckForDeleteCalls() { return false; }

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,53 @@ static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e,
3333
.getResult();
3434
}
3535

36+
// OG has unordered comparison as a form of optimization in addition to
37+
// ordered comparison, while CIR doesn't.
38+
//
39+
// This means that we can't encode the comparison code of UGT (unordered
40+
// greater than), at least not at the CIR level.
41+
//
42+
// The boolean shouldInvert compensates for this.
43+
// For example: to get to the comparison code UGT, we pass in
44+
// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
45+
46+
// There are several ways to support this otherwise:
47+
// - register extra CmpOpKind for unordered comparison types and build the
48+
// translation code for
49+
// to go from CIR -> LLVM dialect. Notice we get this naturally with
50+
// shouldInvert, benefiting from existing infrastructure, albeit having to
51+
// generate an extra `not` at CIR).
52+
// - Just add extra comparison code to a new VecCmpOpKind instead of
53+
// cluttering CmpOpKind.
54+
// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
55+
// comparison
56+
// - Just emit the intrinsics call instead of calling this helper, see how the
57+
// LLVM lowering handles this.
58+
static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
59+
llvm::SmallVector<mlir::Value> &ops,
60+
mlir::Location loc, cir::CmpOpKind pred,
61+
bool shouldInvert) {
62+
assert(!cir::MissingFeatures::cgFPOptionsRAII());
63+
// TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
64+
assert(!cir::MissingFeatures::emitConstrainedFPCall());
65+
mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
66+
mlir::Value bitCast = builder.createBitcast(
67+
shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
68+
return bitCast;
69+
}
70+
3671
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
37-
const CallExpr *e) {
72+
const CallExpr *expr) {
3873
if (builtinID == Builtin::BI__builtin_cpu_is) {
39-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is");
74+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
4075
return {};
4176
}
4277
if (builtinID == Builtin::BI__builtin_cpu_supports) {
43-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports");
78+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
4479
return {};
4580
}
4681
if (builtinID == Builtin::BI__builtin_cpu_init) {
47-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init");
82+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
4883
return {};
4984
}
5085

@@ -65,7 +100,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
65100
getContext().GetBuiltinType(builtinID, error, &iceArguments);
66101
assert(error == ASTContext::GE_None && "Error while getting builtin type.");
67102

68-
for (auto [idx, arg] : llvm::enumerate(e->arguments()))
103+
for (auto [idx, arg] : llvm::enumerate(expr->arguments()))
69104
ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
70105

71106
CIRGenBuilderTy &builder = getBuilder();
@@ -75,15 +110,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
75110
default:
76111
return {};
77112
case X86::BI_mm_clflush:
78-
return emitIntrinsicCallOp(*this, e, "x86.sse2.clflush", voidTy, ops[0]);
113+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.clflush", voidTy, ops[0]);
79114
case X86::BI_mm_lfence:
80-
return emitIntrinsicCallOp(*this, e, "x86.sse2.lfence", voidTy);
115+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.lfence", voidTy);
81116
case X86::BI_mm_pause:
82-
return emitIntrinsicCallOp(*this, e, "x86.sse2.pause", voidTy);
117+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.pause", voidTy);
83118
case X86::BI_mm_mfence:
84-
return emitIntrinsicCallOp(*this, e, "x86.sse2.mfence", voidTy);
119+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.mfence", voidTy);
85120
case X86::BI_mm_sfence:
86-
return emitIntrinsicCallOp(*this, e, "x86.sse.sfence", voidTy);
121+
return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
87122
case X86::BI_mm_prefetch:
88123
case X86::BI__rdtsc:
89124
case X86::BI__builtin_ia32_rdtscp:
@@ -96,7 +131,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
96131
case X86::BI__builtin_ia32_undef128:
97132
case X86::BI__builtin_ia32_undef256:
98133
case X86::BI__builtin_ia32_undef512:
99-
cgm.errorNYI(e->getSourceRange(),
134+
cgm.errorNYI(expr->getSourceRange(),
100135
std::string("unimplemented X86 builtin call: ") +
101136
getContext().BuiltinInfo.getName(builtinID));
102137
return {};
@@ -118,12 +153,12 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
118153
index &= numElts - 1;
119154

120155
cir::ConstantOp indexVal =
121-
builder.getUInt64(index, getLoc(e->getExprLoc()));
156+
builder.getUInt64(index, getLoc(expr->getExprLoc()));
122157

123158
// These builtins exist so we can ensure the index is an ICE and in range.
124159
// Otherwise we could just do this in the header file.
125-
return cir::VecExtractOp::create(builder, getLoc(e->getExprLoc()), ops[0],
126-
indexVal);
160+
return cir::VecExtractOp::create(builder, getLoc(expr->getExprLoc()),
161+
ops[0], indexVal);
127162
}
128163
case X86::BI__builtin_ia32_vec_set_v4hi:
129164
case X86::BI__builtin_ia32_vec_set_v16qi:
@@ -758,10 +793,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
758793
case X86::BI__builtin_ia32_cmpunordpd:
759794
case X86::BI__builtin_ia32_cmpneqps:
760795
case X86::BI__builtin_ia32_cmpneqpd:
796+
cgm.errorNYI(expr->getSourceRange(),
797+
std::string("unimplemented X86 builtin call: ") +
798+
getContext().BuiltinInfo.getName(builtinID));
799+
return {};
761800
case X86::BI__builtin_ia32_cmpnltps:
762801
case X86::BI__builtin_ia32_cmpnltpd:
802+
return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
803+
cir::CmpOpKind::lt, /*shouldInvert=*/true);
763804
case X86::BI__builtin_ia32_cmpnleps:
764805
case X86::BI__builtin_ia32_cmpnlepd:
806+
return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
807+
cir::CmpOpKind::le, /*shouldInvert=*/true);
765808
case X86::BI__builtin_ia32_cmpordps:
766809
case X86::BI__builtin_ia32_cmpordpd:
767810
case X86::BI__builtin_ia32_cmpph128_mask:
@@ -846,7 +889,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
846889
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
847890
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
848891
case X86::BI__builtin_ia32_prefetchi:
849-
cgm.errorNYI(e->getSourceRange(),
892+
cgm.errorNYI(expr->getSourceRange(),
850893
std::string("unimplemented X86 builtin call: ") +
851894
getContext().BuiltinInfo.getName(builtinID));
852895
return {};

0 commit comments

Comments
 (0)