Skip to content

Commit ba58425

Browse files
[CIR][X86] Add support for vpcom builtins
Adds support for the `__builtin_ia32_vpcom` and `__builtin_ia32_vpcomu` builtins. Signed-off-by: vishruth-thimmaiah <[email protected]>
1 parent 49a9787 commit ba58425

File tree

2 files changed

+279
-0
lines changed

2 files changed

+279
-0
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,62 @@ static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
168168
return cir::VecInsertOp::create(builder, loc, vec, value, indexVal);
169169
}
170170

171+
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
172+
llvm::SmallVector<mlir::Value> ops,
173+
bool isSigned) {
174+
mlir::Value op0 = ops[0];
175+
mlir::Value op1 = ops[1];
176+
177+
cir::VectorType ty = cast<cir::VectorType>(op0.getType());
178+
mlir::Type elementTy = ty.getElementType();
179+
180+
uint64_t imm =
181+
ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
182+
0x7;
183+
184+
cir::CmpOpKind pred;
185+
switch (imm) {
186+
case 0x0:
187+
pred = cir::CmpOpKind::lt;
188+
break;
189+
case 0x1:
190+
pred = cir::CmpOpKind::le;
191+
break;
192+
case 0x2:
193+
pred = cir::CmpOpKind::gt;
194+
break;
195+
case 0x3:
196+
pred = cir::CmpOpKind::ge;
197+
break;
198+
case 0x4:
199+
pred = cir::CmpOpKind::eq;
200+
break;
201+
case 0x5:
202+
pred = cir::CmpOpKind::ne;
203+
break;
204+
case 0x6:
205+
return builder.getNullValue(ty, loc); // FALSE
206+
case 0x7: {
207+
llvm::APInt allOnes =
208+
llvm::APInt::getAllOnes(cast<cir::IntType>(elementTy).getWidth());
209+
return cir::VecSplatOp::create(
210+
builder, loc, ty,
211+
builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
212+
}
213+
default:
214+
llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
215+
}
216+
217+
if (!isSigned) {
218+
elementTy = builder.getUIntNTy(cast<cir::IntType>(elementTy).getWidth());
219+
ty = cir::VectorType::get(elementTy, ty.getSize());
220+
op0 = builder.createBitcast(op0, ty);
221+
op1 = builder.createBitcast(op1, ty);
222+
}
223+
224+
return builder.createVecCompare(loc, pred, op0, op1);
225+
}
226+
171227
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
172228
const CallExpr *expr) {
173229
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -900,14 +956,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
900956
case X86::BI__builtin_ia32_ucmpq128_mask:
901957
case X86::BI__builtin_ia32_ucmpq256_mask:
902958
case X86::BI__builtin_ia32_ucmpq512_mask:
959+
cgm.errorNYI(expr->getSourceRange(),
960+
std::string("unimplemented X86 builtin call: ") +
961+
getContext().BuiltinInfo.getName(builtinID));
962+
return {};
903963
case X86::BI__builtin_ia32_vpcomb:
904964
case X86::BI__builtin_ia32_vpcomw:
905965
case X86::BI__builtin_ia32_vpcomd:
906966
case X86::BI__builtin_ia32_vpcomq:
967+
return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
907968
case X86::BI__builtin_ia32_vpcomub:
908969
case X86::BI__builtin_ia32_vpcomuw:
909970
case X86::BI__builtin_ia32_vpcomud:
910971
case X86::BI__builtin_ia32_vpcomuq:
972+
return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
911973
case X86::BI__builtin_ia32_kortestcqi:
912974
case X86::BI__builtin_ia32_kortestchi:
913975
case X86::BI__builtin_ia32_kortestcsi:
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-cir -o %t.cir -Wall -Werror
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-llvm -o %t.ll -Wall -Werror
4+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-cir -o %t.cir -Wall -Werror
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -fclangir -emit-llvm -o %t.ll -Wall -Werror
9+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
12+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
13+
14+
// This test mimics clang/test/CodeGen/X86/xop-builtins.c, which eventually
15+
// CIR shall be able to support fully.
16+
17+
#include <x86intrin.h>
18+
19+
__m128i test_mm_com_epu8(__m128i a, __m128i b) {
20+
// CIR-LABEL: test_mm_com_epu8
21+
// CIR: %[[A:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
22+
// CIR: %[[B:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
23+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[A]], %[[B]]) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i>
24+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
25+
26+
// LLVM-LABEL: test_mm_com_epu8
27+
// LLVM: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
28+
// LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
29+
// LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
30+
31+
// OGCG-LABEL: test_mm_com_epu8
32+
// OGCG: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
33+
// OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
34+
// OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
35+
return _mm_com_epu8(a, b, 0);
36+
}
37+
38+
__m128i test_mm_com_epu16(__m128i a, __m128i b) {
39+
// CIR-LABEL: test_mm_com_epu16
40+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
41+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
42+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i>
43+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
44+
45+
// LLVM-LABEL: test_mm_com_epu16
46+
// LLVM: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
47+
// LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
48+
// LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
49+
50+
// OGCG-LABEL: test_mm_com_epu16
51+
// OGCG: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
52+
// OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
53+
// OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
54+
return _mm_com_epu16(a, b, 0);
55+
}
56+
57+
__m128i test_mm_com_epu32(__m128i a, __m128i b) {
58+
// CIR-LABEL: test_mm_com_epu32
59+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
60+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
61+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i>
62+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
63+
64+
// LLVM-LABEL: test_mm_com_epu32
65+
// LLVM: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
66+
// LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
67+
// LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
68+
69+
// OGCG-LABEL: test_mm_com_epu32
70+
// OGCG: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
71+
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
72+
// OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
73+
return _mm_com_epu32(a, b, 0);
74+
}
75+
76+
__m128i test_mm_com_epu64(__m128i a, __m128i b) {
77+
// CIR-LABEL: test_mm_com_epu64
78+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
79+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
80+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
81+
82+
// LLVM-LABEL: test_mm_com_epu64
83+
// LLVM: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
84+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
85+
86+
// OGCG-LABEL: test_mm_com_epu64
87+
// OGCG: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
88+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
89+
return _mm_com_epu64(a, b, 0);
90+
}
91+
92+
__m128i test_mm_com_epi8(__m128i a, __m128i b) {
93+
// CIR-LABEL: test_mm_com_epi8
94+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>
95+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
96+
97+
// LLVM-LABEL: test_mm_com_epi8
98+
// LLVM: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
99+
// LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
100+
// LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
101+
102+
// OGCG-LABEL: test_mm_com_epi8
103+
// OGCG: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
104+
// OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
105+
// OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
106+
return _mm_com_epi8(a, b, 0);
107+
}
108+
109+
__m128i test_mm_com_epi16(__m128i a, __m128i b) {
110+
// CIR-LABEL: test_mm_com_epi16
111+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>
112+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
113+
114+
// LLVM-LABEL: test_mm_com_epi16
115+
// LLVM: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
116+
// LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
117+
// LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
118+
119+
// OGCG-LABEL: test_mm_com_epi16
120+
// OGCG: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
121+
// OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
122+
// OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
123+
return _mm_com_epi16(a, b, 0);
124+
}
125+
126+
__m128i test_mm_com_epi32(__m128i a, __m128i b) {
127+
// CIR-LABEL: test_mm_com_epi32
128+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
129+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
130+
131+
// LLVM-LABEL: test_mm_com_epi32
132+
// LLVM: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
133+
// LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
134+
// LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
135+
136+
// OGCG-LABEL: test_mm_com_epi32
137+
// OGCG: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
138+
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
139+
// OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
140+
return _mm_com_epi32(a, b, 0);
141+
}
142+
143+
__m128i test_mm_com_epi64(__m128i a, __m128i b) {
144+
// CIR-LABEL: test_mm_com_epi64
145+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
146+
147+
// LLVM-LABEL: test_mm_com_epi64
148+
// LLVM: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
149+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
150+
151+
// OGCG-LABEL: test_mm_com_epi64
152+
// OGCG: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
153+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
154+
return _mm_com_epi64(a, b, 0);
155+
}
156+
157+
__m128i test_mm_com_epi32_false(__m128i a, __m128i b) {
158+
// CIR-LABEL: test_mm_com_epi32_false
159+
// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
160+
// CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
161+
162+
// LLVM-LABEL: test_mm_com_epi32_false
163+
// LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
164+
// LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
165+
// LLVM: ret <2 x i64> %[[ZERO]]
166+
167+
// OGCG-LABEL: test_mm_com_epi32_false
168+
// OGCG: ret <2 x i64> zeroinitializer
169+
return _mm_com_epi32(a, b, 6);
170+
}
171+
172+
__m128i test_mm_com_epu32_false(__m128i a, __m128i b) {
173+
// CIR-LABEL: test_mm_com_epu32_false
174+
// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
175+
// CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
176+
177+
// LLVM-LABEL: test_mm_com_epu32_false
178+
// LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
179+
// LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
180+
// LLVM: ret <2 x i64> %[[ZERO]]
181+
182+
// OGCG-LABEL: test_mm_com_epu32_false
183+
// OGCG: ret <2 x i64> zeroinitializer
184+
return _mm_com_epu32(a, b, 6);
185+
}
186+
187+
__m128i test_mm_com_epi32_true(__m128i a, __m128i b) {
188+
// CIR-LABEL: test_mm_com_epi32_true
189+
// CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
190+
// CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
191+
// CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
192+
193+
// LLVM-LABEL: test_mm_com_epi32_true
194+
// LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
195+
// LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
196+
// LLVM: ret <2 x i64> %[[SPLAT]]
197+
198+
// OGCG-LABEL: test_mm_com_epi32_true
199+
// OGCG: ret <2 x i64> splat (i64 -1)
200+
return _mm_com_epi32(a, b, 7);
201+
}
202+
203+
__m128i test_mm_com_epu32_true(__m128i a, __m128i b) {
204+
// CIR-LABEL: test_mm_com_epu32_true
205+
// CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
206+
// CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
207+
// CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
208+
209+
// LLVM-LABEL: test_mm_com_epu32_true
210+
// LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
211+
// LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
212+
// LLVM: ret <2 x i64> %[[SPLAT]]
213+
214+
// OGCG-LABEL: test_mm_com_epu32_true
215+
// OGCG: ret <2 x i64> splat (i64 -1)
216+
return _mm_com_epu32(a, b, 7);
217+
}

0 commit comments

Comments
 (0)