Skip to content

Commit 9fc2a91

Browse files
authored
[CIR] Implement __builtin_ia32_cmpnleps/cmpnlepd (#1893)
Fixes #1818 - Implement createVecCompare, getCIRIntOrFloatBitWidth, getVectorFCmpIR helper for VecCmp op creation. - Add clang/test/CIR/CodeGen/builtin-fcmp-sse.c test. in OG, there is a sext from bool to int before casting to float vector since fcmp's result in llvm ir is boolean-like, while VecCmpOp in CIR returns int in the form of 0 or -1. There is also a boolean `shouldInvert` in CIR since CIR doesn't contain optimized unordered comparison, for example: OLE is the inverse predicate of UGT. So if we need UGT, we have to pass in OLE and `shouldInvert = true`
1 parent d8f3180 commit 9fc2a91

File tree

3 files changed

+113
-1
lines changed

3 files changed

+113
-1
lines changed

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
8989
return cir::IntType::get(getContext(), N, false);
9090
}
9191

92+
static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) {
93+
if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
94+
return intType.getWidth();
95+
if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
96+
return floatType.getWidth();
97+
98+
llvm_unreachable("Wrong type passed in or Non-CIR type passed in");
99+
}
92100
cir::IntType getSIntNTy(int N) {
93101
return cir::IntType::get(getContext(), N, true);
94102
}
@@ -188,6 +196,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
188196
return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
189197
}
190198

199+
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
200+
mlir::Value lhs, mlir::Value rhs) {
201+
VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
202+
auto integralTy =
203+
getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
204+
VectorType integralVecTy =
205+
VectorType::get(context, integralTy, vecCast.getSize());
206+
return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs);
207+
}
208+
191209
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
192210
return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
193211
}

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,44 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
306306
Ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, E));
307307
}
308308

309+
// OG has unordered comparison as a form of optimization in addition to
310+
// ordered comparison, while CIR doesn't.
311+
//
312+
// This means that we can't encode the comparison code of UGT (unordered
313+
// greater than), at least not at the CIR level.
314+
//
315+
// The boolean shouldInvert compensates for this.
316+
// For example: to get to the comparison code UGT, we pass in
317+
// getVectorFCmpIR(OLE, shouldInvert = true) since OLE is the inverse of UGT.
318+
319+
// There are several ways to support this otherwise:
320+
// - register extra CmpOpKind for unordered comparison types and build the
321+
// translation code for
322+
// to go from CIR -> LLVM dialect. Notice we get this naturally with
323+
// shouldInvert, benefiting from existing infrastructure, albeit having to
324+
// generate an extra `not` at CIR).
325+
// - Just add extra comparison code to a new VecCmpOpKind instead of
326+
// cluttering CmpOpKind.
327+
// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
328+
// comparison
329+
// - Just emit the intrinsics call instead of calling this helper, see how the
330+
// LLVM lowering handles this.
331+
auto getVectorFCmpIR = [this, &Ops, &E](cir::CmpOpKind pred,
332+
bool shouldInvert, bool isSignaling) {
333+
assert(!cir::MissingFeatures::CGFPOptionsRAII());
334+
auto loc = getLoc(E->getExprLoc());
335+
mlir::Value cmp;
336+
if (builder.getIsFPConstrained())
337+
// TODO: Add isSignaling boolean once emitConstrainedFPCall implemented
338+
assert(cir::MissingFeatures::emitConstrainedFPCall());
339+
else
340+
cmp = builder.createVecCompare(loc, pred, Ops[0], Ops[1]);
341+
342+
mlir::Value bitCast = builder.createBitcast(
343+
shouldInvert ? builder.createNot(cmp) : cmp, Ops[0].getType());
344+
return bitCast;
345+
};
346+
309347
switch (BuiltinID) {
310348
default:
311349
return nullptr;
@@ -1705,7 +1743,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
17051743
llvm_unreachable("cmpnltps NYI");
17061744
case X86::BI__builtin_ia32_cmpnleps:
17071745
case X86::BI__builtin_ia32_cmpnlepd:
1708-
llvm_unreachable("cmpnleps NYI");
1746+
return getVectorFCmpIR(cir::CmpOpKind::le, /*shouldInvert=*/true,
1747+
/*isSignaling=*/true);
17091748
case X86::BI__builtin_ia32_cmpordps:
17101749
case X86::BI__builtin_ia32_cmpordpd:
17111750
llvm_unreachable("cmpordps NYI");
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
2+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OG
4+
5+
typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
6+
typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
7+
8+
__m128 test_cmpnleps(__m128 A, __m128 B) {
9+
10+
// CIR-LABEL: @test_cmpnleps
11+
// CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
12+
// CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
13+
// CIR-NEXT: [[CAST:%.*]] = cir.cast(bitcast, [[NOTCMP:%.*]] : !cir.vector<!s32i x 4>), !cir.vector<!cir.float x 4>
14+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
15+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
16+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
17+
18+
// LLVM-LABEL: test_cmpnleps
19+
// LLVM: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
20+
// LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
21+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
22+
// LLVM-NEXT: ret <4 x float> [[CAST]]
23+
24+
// OG-LABEL: test_cmpnleps
25+
// OG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
26+
// OG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
27+
// OG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
28+
// OG-NEXT: ret <4 x float> [[CAST]]
29+
return __builtin_ia32_cmpnleps(A, B);
30+
}
31+
32+
33+
__m128d test_cmpnlepd(__m128d A, __m128d B) {
34+
35+
// CIR-LABEL: @test_cmpnlepd
36+
// CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
37+
// CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
38+
// CIR-NEXT: [[CAST:%.*]] = cir.cast(bitcast, [[NOTCMP]] : !cir.vector<!s64i x 2>), !cir.vector<!cir.double x 2>
39+
// CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
40+
// CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
41+
// CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
42+
43+
// LLVM-LABEL: test_cmpnlepd
44+
// LLVM: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
45+
// LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
46+
// LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
47+
// LLVM-NEXT: ret <2 x double> [[CAST]]
48+
49+
// OG-LABEL: test_cmpnlepd
50+
// OG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
51+
// OG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
52+
// OG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
53+
// OG-NEXT: ret <2 x double> [[CAST]]
54+
return __builtin_ia32_cmpnlepd(A, B);
55+
}

0 commit comments

Comments
 (0)