Skip to content

Commit a65aab4

Browse files
[CIR][X86] Add support for vpcom builtins
Adds support for the `__builtin_ia32_vpcom` and `__builtin_ia32_vpcomu` builtins. Signed-off-by: vishruth-thimmaiah <[email protected]>
1 parent a77c494 commit a65aab4

File tree

2 files changed

+262
-0
lines changed

2 files changed

+262
-0
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,62 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
303303
return builder.createMul(loc, lhs, rhs);
304304
}
305305

306+
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
307+
llvm::SmallVector<mlir::Value> ops,
308+
bool isSigned) {
309+
mlir::Value op0 = ops[0];
310+
mlir::Value op1 = ops[1];
311+
312+
cir::VectorType ty = cast<cir::VectorType>(op0.getType());
313+
mlir::Type elementTy = ty.getElementType();
314+
315+
uint64_t imm =
316+
ops[2].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() &
317+
0x7;
318+
319+
cir::CmpOpKind pred;
320+
switch (imm) {
321+
case 0x0:
322+
pred = cir::CmpOpKind::lt;
323+
break;
324+
case 0x1:
325+
pred = cir::CmpOpKind::le;
326+
break;
327+
case 0x2:
328+
pred = cir::CmpOpKind::gt;
329+
break;
330+
case 0x3:
331+
pred = cir::CmpOpKind::ge;
332+
break;
333+
case 0x4:
334+
pred = cir::CmpOpKind::eq;
335+
break;
336+
case 0x5:
337+
pred = cir::CmpOpKind::ne;
338+
break;
339+
case 0x6:
340+
return builder.getNullValue(ty, loc); // FALSE
341+
case 0x7: {
342+
llvm::APInt allOnes =
343+
llvm::APInt::getAllOnes(cast<cir::IntType>(elementTy).getWidth());
344+
return cir::VecSplatOp::create(
345+
builder, loc, ty,
346+
builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
347+
}
348+
default:
349+
llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
350+
}
351+
352+
if (!isSigned) {
353+
elementTy = builder.getUIntNTy(cast<cir::IntType>(elementTy).getWidth());
354+
ty = cir::VectorType::get(elementTy, ty.getSize());
355+
op0 = builder.createBitcast(op0, ty);
356+
op1 = builder.createBitcast(op1, ty);
357+
}
358+
359+
return builder.createVecCompare(loc, pred, op0, op1);
360+
}
361+
306362
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
307363
const CallExpr *expr) {
308364
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -1147,14 +1203,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
11471203
case X86::BI__builtin_ia32_ucmpq128_mask:
11481204
case X86::BI__builtin_ia32_ucmpq256_mask:
11491205
case X86::BI__builtin_ia32_ucmpq512_mask:
1206+
cgm.errorNYI(expr->getSourceRange(),
1207+
std::string("unimplemented X86 builtin call: ") +
1208+
getContext().BuiltinInfo.getName(builtinID));
1209+
return {};
11501210
case X86::BI__builtin_ia32_vpcomb:
11511211
case X86::BI__builtin_ia32_vpcomw:
11521212
case X86::BI__builtin_ia32_vpcomd:
11531213
case X86::BI__builtin_ia32_vpcomq:
1214+
return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
11541215
case X86::BI__builtin_ia32_vpcomub:
11551216
case X86::BI__builtin_ia32_vpcomuw:
11561217
case X86::BI__builtin_ia32_vpcomud:
11571218
case X86::BI__builtin_ia32_vpcomuq:
1219+
return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
11581220
case X86::BI__builtin_ia32_kortestcqi:
11591221
case X86::BI__builtin_ia32_kortestchi:
11601222
case X86::BI__builtin_ia32_kortestcsi:

clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,203 @@ __m128i test_mm_roti_epi64(__m128i a) {
9090
// OGCG: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x i64> %[[VAR]], <2 x i64> splat (i64 100))
9191
return _mm_roti_epi64(a, 100);
9292
}
93+
94+
__m128i test_mm_com_epu8(__m128i a, __m128i b) {
95+
// CIR-LABEL: test_mm_com_epu8
96+
// CIR: %[[A:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
97+
// CIR: %[[B:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i>
98+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[A]], %[[B]]) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i>
99+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
100+
101+
// LLVM-LABEL: test_mm_com_epu8
102+
// LLVM: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
103+
// LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
104+
// LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
105+
106+
// OGCG-LABEL: test_mm_com_epu8
107+
// OGCG: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
108+
// OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
109+
// OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
110+
return _mm_com_epu8(a, b, 0);
111+
}
112+
113+
__m128i test_mm_com_epu16(__m128i a, __m128i b) {
114+
// CIR-LABEL: test_mm_com_epu16
115+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
116+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
117+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i>
118+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
119+
120+
// LLVM-LABEL: test_mm_com_epu16
121+
// LLVM: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
122+
// LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
123+
// LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
124+
125+
// OGCG-LABEL: test_mm_com_epu16
126+
// OGCG: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
127+
// OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
128+
// OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
129+
return _mm_com_epu16(a, b, 0);
130+
}
131+
132+
__m128i test_mm_com_epu32(__m128i a, __m128i b) {
133+
// CIR-LABEL: test_mm_com_epu32
134+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
135+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
136+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i>
137+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
138+
139+
// LLVM-LABEL: test_mm_com_epu32
140+
// LLVM: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
141+
// LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
142+
// LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
143+
144+
// OGCG-LABEL: test_mm_com_epu32
145+
// OGCG: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
146+
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
147+
// OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
148+
return _mm_com_epu32(a, b, 0);
149+
}
150+
151+
__m128i test_mm_com_epu64(__m128i a, __m128i b) {
152+
// CIR-LABEL: test_mm_com_epu64
153+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
154+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
155+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
156+
157+
// LLVM-LABEL: test_mm_com_epu64
158+
// LLVM: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
159+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
160+
161+
// OGCG-LABEL: test_mm_com_epu64
162+
// OGCG: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
163+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
164+
return _mm_com_epu64(a, b, 0);
165+
}
166+
167+
__m128i test_mm_com_epi8(__m128i a, __m128i b) {
168+
// CIR-LABEL: test_mm_com_epi8
169+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>
170+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
171+
172+
// LLVM-LABEL: test_mm_com_epi8
173+
// LLVM: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
174+
// LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
175+
// LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
176+
177+
// OGCG-LABEL: test_mm_com_epi8
178+
// OGCG: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
179+
// OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
180+
// OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
181+
return _mm_com_epi8(a, b, 0);
182+
}
183+
184+
__m128i test_mm_com_epi16(__m128i a, __m128i b) {
185+
// CIR-LABEL: test_mm_com_epi16
186+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>
187+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
188+
189+
// LLVM-LABEL: test_mm_com_epi16
190+
// LLVM: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
191+
// LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
192+
// LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
193+
194+
// OGCG-LABEL: test_mm_com_epi16
195+
// OGCG: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
196+
// OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
197+
// OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
198+
return _mm_com_epi16(a, b, 0);
199+
}
200+
201+
__m128i test_mm_com_epi32(__m128i a, __m128i b) {
202+
// CIR-LABEL: test_mm_com_epi32
203+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
204+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
205+
206+
// LLVM-LABEL: test_mm_com_epi32
207+
// LLVM: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
208+
// LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
209+
// LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
210+
211+
// OGCG-LABEL: test_mm_com_epi32
212+
// OGCG: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
213+
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
214+
// OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
215+
return _mm_com_epi32(a, b, 0);
216+
}
217+
218+
__m128i test_mm_com_epi64(__m128i a, __m128i b) {
219+
// CIR-LABEL: test_mm_com_epi64
220+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
221+
222+
// LLVM-LABEL: test_mm_com_epi64
223+
// LLVM: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
224+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
225+
226+
// OGCG-LABEL: test_mm_com_epi64
227+
// OGCG: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
228+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
229+
return _mm_com_epi64(a, b, 0);
230+
}
231+
232+
__m128i test_mm_com_epi32_false(__m128i a, __m128i b) {
233+
// CIR-LABEL: test_mm_com_epi32_false
234+
// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
235+
// CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
236+
237+
// LLVM-LABEL: test_mm_com_epi32_false
238+
// LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
239+
// LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
240+
// LLVM: ret <2 x i64> %[[ZERO]]
241+
242+
// OGCG-LABEL: test_mm_com_epi32_false
243+
// OGCG: ret <2 x i64> zeroinitializer
244+
return _mm_com_epi32(a, b, 6);
245+
}
246+
247+
__m128i test_mm_com_epu32_false(__m128i a, __m128i b) {
248+
// CIR-LABEL: test_mm_com_epu32_false
249+
// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
250+
// CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
251+
252+
// LLVM-LABEL: test_mm_com_epu32_false
253+
// LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
254+
// LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
255+
// LLVM: ret <2 x i64> %[[ZERO]]
256+
257+
// OGCG-LABEL: test_mm_com_epu32_false
258+
// OGCG: ret <2 x i64> zeroinitializer
259+
return _mm_com_epu32(a, b, 6);
260+
}
261+
262+
__m128i test_mm_com_epi32_true(__m128i a, __m128i b) {
263+
// CIR-LABEL: test_mm_com_epi32_true
264+
// CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
265+
// CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
266+
// CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
267+
268+
// LLVM-LABEL: test_mm_com_epi32_true
269+
// LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
270+
// LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
271+
// LLVM: ret <2 x i64> %[[SPLAT]]
272+
273+
// OGCG-LABEL: test_mm_com_epi32_true
274+
// OGCG: ret <2 x i64> splat (i64 -1)
275+
return _mm_com_epi32(a, b, 7);
276+
}
277+
278+
__m128i test_mm_com_epu32_true(__m128i a, __m128i b) {
279+
// CIR-LABEL: test_mm_com_epu32_true
280+
// CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
281+
// CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
282+
// CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
283+
284+
// LLVM-LABEL: test_mm_com_epu32_true
285+
// LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
286+
// LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
287+
// LLVM: ret <2 x i64> %[[SPLAT]]
288+
289+
// OGCG-LABEL: test_mm_com_epu32_true
290+
// OGCG: ret <2 x i64> splat (i64 -1)
291+
return _mm_com_epu32(a, b, 7);
292+
}

0 commit comments

Comments
 (0)