Skip to content

Commit 67307c8

Browse files
[CIR][X86] Add support for vpcom builtins (#170362)
Adds support for the `__builtin_ia32_vpcom` and `__builtin_ia32_vpcomu` X86 builtins. Part of #167765 --------- Signed-off-by: vishruth-thimmaiah <[email protected]>
1 parent 00c51ec commit 67307c8

File tree

2 files changed

+359
-4
lines changed

2 files changed

+359
-4
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,61 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
315315
return builder.createMul(loc, lhs, rhs);
316316
}
317317

318+
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
319+
llvm::SmallVector<mlir::Value> ops,
320+
bool isSigned) {
321+
mlir::Value op0 = ops[0];
322+
mlir::Value op1 = ops[1];
323+
324+
cir::VectorType ty = cast<cir::VectorType>(op0.getType());
325+
cir::IntType elementTy = cast<cir::IntType>(ty.getElementType());
326+
327+
uint64_t imm = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
328+
329+
cir::CmpOpKind pred;
330+
switch (imm) {
331+
case 0x0:
332+
pred = cir::CmpOpKind::lt;
333+
break;
334+
case 0x1:
335+
pred = cir::CmpOpKind::le;
336+
break;
337+
case 0x2:
338+
pred = cir::CmpOpKind::gt;
339+
break;
340+
case 0x3:
341+
pred = cir::CmpOpKind::ge;
342+
break;
343+
case 0x4:
344+
pred = cir::CmpOpKind::eq;
345+
break;
346+
case 0x5:
347+
pred = cir::CmpOpKind::ne;
348+
break;
349+
case 0x6:
350+
return builder.getNullValue(ty, loc); // FALSE
351+
case 0x7: {
352+
llvm::APInt allOnes = llvm::APInt::getAllOnes(elementTy.getWidth());
353+
return cir::VecSplatOp::create(
354+
builder, loc, ty,
355+
builder.getConstAPInt(loc, elementTy, allOnes)); // TRUE
356+
}
357+
default:
358+
llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
359+
}
360+
361+
if ((!isSigned && elementTy.isSigned()) ||
362+
(isSigned && elementTy.isUnsigned())) {
363+
elementTy = elementTy.isSigned() ? builder.getUIntNTy(elementTy.getWidth())
364+
: builder.getSIntNTy(elementTy.getWidth());
365+
ty = cir::VectorType::get(elementTy, ty.getSize());
366+
op0 = builder.createBitcast(op0, ty);
367+
op1 = builder.createBitcast(op1, ty);
368+
}
369+
370+
return builder.createVecCompare(loc, pred, op0, op1);
371+
}
372+
318373
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
319374
const CallExpr *expr) {
320375
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -1159,18 +1214,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
11591214
case X86::BI__builtin_ia32_ucmpq128_mask:
11601215
case X86::BI__builtin_ia32_ucmpq256_mask:
11611216
case X86::BI__builtin_ia32_ucmpq512_mask:
1217+
cgm.errorNYI(expr->getSourceRange(),
1218+
std::string("unimplemented X86 builtin call: ") +
1219+
getContext().BuiltinInfo.getName(builtinID));
1220+
return {};
11621221
case X86::BI__builtin_ia32_vpcomb:
11631222
case X86::BI__builtin_ia32_vpcomw:
11641223
case X86::BI__builtin_ia32_vpcomd:
11651224
case X86::BI__builtin_ia32_vpcomq:
1225+
return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, true);
11661226
case X86::BI__builtin_ia32_vpcomub:
11671227
case X86::BI__builtin_ia32_vpcomuw:
11681228
case X86::BI__builtin_ia32_vpcomud:
11691229
case X86::BI__builtin_ia32_vpcomuq:
1170-
cgm.errorNYI(expr->getSourceRange(),
1171-
std::string("unimplemented X86 builtin call: ") +
1172-
getContext().BuiltinInfo.getName(builtinID));
1173-
return {};
1230+
return emitX86vpcom(builder, getLoc(expr->getExprLoc()), ops, false);
11741231
case X86::BI__builtin_ia32_kortestcqi:
11751232
case X86::BI__builtin_ia32_kortestchi:
11761233
case X86::BI__builtin_ia32_kortestcsi:

clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,301 @@ __m128i test_mm_roti_epi64(__m128i a) {
9090
// OGCG: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x i64> %[[VAR]], <2 x i64> splat (i64 100))
9191
return _mm_roti_epi64(a, 100);
9292
}
93+
94+
__m128i test_mm_com_epu8(__m128i a, __m128i b) {
95+
// CIR-LABEL: test_mm_com_epu8
96+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !u8i>, !cir.vector<16 x !s8i>
97+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
98+
99+
// LLVM-LABEL: test_mm_com_epu8
100+
// LLVM: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
101+
// LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
102+
// LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
103+
104+
// OGCG-LABEL: test_mm_com_epu8
105+
// OGCG: %[[CMP:.*]] = icmp ult <16 x i8> %{{.*}}, %{{.*}}
106+
// OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
107+
// OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
108+
return _mm_com_epu8(a, b, 0);
109+
}
110+
111+
__m128i test_mm_com_epu16(__m128i a, __m128i b) {
112+
// CIR-LABEL: test_mm_com_epu16
113+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
114+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !u16i>
115+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<8 x !u16i>, !cir.vector<8 x !s16i>
116+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
117+
118+
// LLVM-LABEL: test_mm_com_epu16
119+
// LLVM: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
120+
// LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
121+
// LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
122+
123+
// OGCG-LABEL: test_mm_com_epu16
124+
// OGCG: %[[CMP:.*]] = icmp ult <8 x i16> %{{.*}}, %{{.*}}
125+
// OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
126+
// OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
127+
return _mm_com_epu16(a, b, 0);
128+
}
129+
130+
__m128i test_mm_com_epu32(__m128i a, __m128i b) {
131+
// CIR-LABEL: test_mm_com_epu32
132+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
133+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s32i> -> !cir.vector<4 x !u32i>
134+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<4 x !u32i>, !cir.vector<4 x !s32i>
135+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
136+
137+
// LLVM-LABEL: test_mm_com_epu32
138+
// LLVM: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
139+
// LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
140+
// LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
141+
142+
// OGCG-LABEL: test_mm_com_epu32
143+
// OGCG: %[[CMP:.*]] = icmp ult <4 x i32> %{{.*}}, %{{.*}}
144+
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
145+
// OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
146+
return _mm_com_epu32(a, b, 0);
147+
}
148+
149+
__m128i test_mm_com_epu64(__m128i a, __m128i b) {
150+
// CIR-LABEL: test_mm_com_epu64
151+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
152+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
153+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
154+
155+
// LLVM-LABEL: test_mm_com_epu64
156+
// LLVM: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
157+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
158+
159+
// OGCG-LABEL: test_mm_com_epu64
160+
// OGCG: %[[CMP:.*]] = icmp ult <2 x i64> %{{.*}}, %{{.*}}
161+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
162+
a = _mm_com_epu64(a, b, 0);
163+
164+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
165+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
166+
// CIR: %[[CMP:.*]] = cir.vec.cmp(le, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
167+
168+
// LLVM: %[[CMP:.*]] = icmp ule <2 x i64> %{{.*}}, %{{.*}}
169+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
170+
171+
// OGCG: %[[CMP:.*]] = icmp ule <2 x i64> %{{.*}}, %{{.*}}
172+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
173+
a = _mm_com_epu64(a, b, 1);
174+
175+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
176+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
177+
// CIR: %[[CMP:.*]] = cir.vec.cmp(gt, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
178+
179+
// LLVM: %[[CMP:.*]] = icmp ugt <2 x i64> %{{.*}}, %{{.*}}
180+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
181+
182+
// OGCG: %[[CMP:.*]] = icmp ugt <2 x i64> %{{.*}}, %{{.*}}
183+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
184+
a = _mm_com_epu64(a, b, 2);
185+
186+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
187+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
188+
// CIR: %[[CMP:.*]] = cir.vec.cmp(ge, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
189+
190+
// LLVM: %[[CMP:.*]] = icmp uge <2 x i64> %{{.*}}, %{{.*}}
191+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
192+
193+
// OGCG: %[[CMP:.*]] = icmp uge <2 x i64> %{{.*}}, %{{.*}}
194+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
195+
a = _mm_com_epu64(a, b, 3);
196+
197+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
198+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
199+
// CIR: %[[CMP:.*]] = cir.vec.cmp(eq, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
200+
201+
// LLVM: %[[CMP:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}}
202+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
203+
204+
// OGCG: %[[CMP:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}}
205+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
206+
a = _mm_com_epu64(a, b, 4);
207+
208+
// CIR: %[[VAL1:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
209+
// CIR: %[[VAL2:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<2 x !u64i>
210+
// CIR: %[[CMP:.*]] = cir.vec.cmp(ne, %[[VAL1]], %[[VAL2]]) : !cir.vector<2 x !u64i>, !cir.vector<2 x !s64i>
211+
212+
// LLVM: %[[CMP:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}}
213+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
214+
215+
// OGCG: %[[CMP:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}}
216+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
217+
return _mm_com_epu64(a, b, 5);
218+
}
219+
220+
__m128i test_mm_com_epi8(__m128i a, __m128i b) {
221+
// CIR-LABEL: test_mm_com_epi8
222+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>
223+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<16 x !s8i> -> !cir.vector<2 x !s64i>
224+
225+
// LLVM-LABEL: test_mm_com_epi8
226+
// LLVM: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
227+
// LLVM: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
228+
// LLVM: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
229+
230+
// OGCG-LABEL: test_mm_com_epi8
231+
// OGCG: %[[CMP:.*]] = icmp slt <16 x i8> %{{.*}}, %{{.*}}
232+
// OGCG: %[[RES:.*]] = sext <16 x i1> %[[CMP]] to <16 x i8>
233+
// OGCG: %{{.*}} = bitcast <16 x i8> %[[RES]] to <2 x i64>
234+
return _mm_com_epi8(a, b, 0);
235+
}
236+
237+
__m128i test_mm_com_epi16(__m128i a, __m128i b) {
238+
// CIR-LABEL: test_mm_com_epi16
239+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>
240+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i>
241+
242+
// LLVM-LABEL: test_mm_com_epi16
243+
// LLVM: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
244+
// LLVM: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
245+
// LLVM: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
246+
247+
// OGCG-LABEL: test_mm_com_epi16
248+
// OGCG: %[[CMP:.*]] = icmp slt <8 x i16> %{{.*}}, %{{.*}}
249+
// OGCG: %[[RES:.*]] = sext <8 x i1> %[[CMP]] to <8 x i16>
250+
// OGCG: %{{.*}} = bitcast <8 x i16> %[[RES]] to <2 x i64>
251+
return _mm_com_epi16(a, b, 0);
252+
}
253+
254+
__m128i test_mm_com_epi32(__m128i a, __m128i b) {
255+
// CIR-LABEL: test_mm_com_epi32
256+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
257+
// CIR: %[[RES:.*]] = cir.cast bitcast %[[CMP]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
258+
259+
// LLVM-LABEL: test_mm_com_epi32
260+
// LLVM: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
261+
// LLVM: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
262+
// LLVM: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
263+
264+
// OGCG-LABEL: test_mm_com_epi32
265+
// OGCG: %[[CMP:.*]] = icmp slt <4 x i32> %{{.*}}, %{{.*}}
266+
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[CMP]] to <4 x i32>
267+
// OGCG: %{{.*}} = bitcast <4 x i32> %[[RES]] to <2 x i64>
268+
return _mm_com_epi32(a, b, 0);
269+
}
270+
271+
__m128i test_mm_com_epi64(__m128i a, __m128i b) {
272+
// CIR-LABEL: test_mm_com_epi64
273+
// CIR: %[[CMP:.*]] = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
274+
275+
// LLVM-LABEL: test_mm_com_epi64
276+
// LLVM: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
277+
// LLVM: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
278+
279+
// OGCG-LABEL: test_mm_com_epi64
280+
// OGCG: %[[CMP:.*]] = icmp slt <2 x i64> %{{.*}}, %{{.*}}
281+
// OGCG: %[[RES:.*]] = sext <2 x i1> %[[CMP]] to <2 x i64>
282+
a = _mm_com_epi64(a, b, 0);
283+
284+
// CIR: %[[CMP1:.*]] = cir.vec.cmp(le, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
285+
286+
// LLVM: %[[CMP1:.*]] = icmp sle <2 x i64> %{{.*}}, %{{.*}}
287+
// LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
288+
289+
// OGCG: %[[CMP1:.*]] = icmp sle <2 x i64> %{{.*}}, %{{.*}}
290+
// OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
291+
a = _mm_com_epi64(a, b, 1);
292+
293+
// CIR: %[[CMP1:.*]] = cir.vec.cmp(gt, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
294+
295+
// LLVM: %[[CMP1:.*]] = icmp sgt <2 x i64> %{{.*}}, %{{.*}}
296+
// LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
297+
298+
// OGCG: %[[CMP1:.*]] = icmp sgt <2 x i64> %{{.*}}, %{{.*}}
299+
// OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
300+
a = _mm_com_epi64(a, b, 2);
301+
302+
// CIR: %[[CMP1:.*]] = cir.vec.cmp(ge, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
303+
304+
// LLVM: %[[CMP1:.*]] = icmp sge <2 x i64> %{{.*}}, %{{.*}}
305+
// LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
306+
307+
// OGCG: %[[CMP1:.*]] = icmp sge <2 x i64> %{{.*}}, %{{.*}}
308+
// OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
309+
a = _mm_com_epi64(a, b, 3);
310+
311+
// CIR: %[[CMP1:.*]] = cir.vec.cmp(eq, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
312+
313+
// LLVM: %[[CMP1:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}}
314+
// LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
315+
316+
// OGCG: %[[CMP1:.*]] = icmp eq <2 x i64> %{{.*}}, %{{.*}}
317+
// OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
318+
a = _mm_com_epi64(a, b, 4);
319+
320+
// CIR: %[[CMP1:.*]] = cir.vec.cmp(ne, %{{.*}}, %{{.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>
321+
322+
// LLVM: %[[CMP1:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}}
323+
// LLVM: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
324+
325+
// OGCG: %[[CMP1:.*]] = icmp ne <2 x i64> %{{.*}}, %{{.*}}
326+
// OGCG: %[[RES1:.*]] = sext <2 x i1> %[[CMP1]] to <2 x i64>
327+
return _mm_com_epi64(a, b, 5);
328+
}
329+
330+
__m128i test_mm_com_epi32_false(__m128i a, __m128i b) {
331+
// CIR-LABEL: test_mm_com_epi32_false
332+
// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
333+
// CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
334+
335+
// LLVM-LABEL: test_mm_com_epi32_false
336+
// LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
337+
// LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
338+
// LLVM: ret <2 x i64> %[[ZERO]]
339+
340+
// OGCG-LABEL: test_mm_com_epi32_false
341+
// OGCG: ret <2 x i64> zeroinitializer
342+
return _mm_com_epi32(a, b, 6);
343+
}
344+
345+
__m128i test_mm_com_epu32_false(__m128i a, __m128i b) {
346+
// CIR-LABEL: test_mm_com_epu32_false
347+
// CIR: %[[ZERO:.*]] = cir.const #cir.zero : !cir.vector<4 x !s32i>
348+
// CIR: %{{.*}} = cir.cast bitcast %[[ZERO]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
349+
350+
// LLVM-LABEL: test_mm_com_epu32_false
351+
// LLVM: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
352+
// LLVM: %[[ZERO:.*]] = load <2 x i64>, ptr %[[A]], align 16
353+
// LLVM: ret <2 x i64> %[[ZERO]]
354+
355+
// OGCG-LABEL: test_mm_com_epu32_false
356+
// OGCG: ret <2 x i64> zeroinitializer
357+
return _mm_com_epu32(a, b, 6);
358+
}
359+
360+
__m128i test_mm_com_epi32_true(__m128i a, __m128i b) {
361+
// CIR-LABEL: test_mm_com_epi32_true
362+
// CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
363+
// CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
364+
// CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
365+
366+
// LLVM-LABEL: test_mm_com_epi32_true
367+
// LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
368+
// LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
369+
// LLVM: ret <2 x i64> %[[SPLAT]]
370+
371+
// OGCG-LABEL: test_mm_com_epi32_true
372+
// OGCG: ret <2 x i64> splat (i64 -1)
373+
return _mm_com_epi32(a, b, 7);
374+
}
375+
376+
__m128i test_mm_com_epu32_true(__m128i a, __m128i b) {
377+
// CIR-LABEL: test_mm_com_epu32_true
378+
// CIR: %[[VAL:.*]] = cir.const #cir.int<-1> : !s32i
379+
// CIR: %[[SPLAT:.*]] = cir.vec.splat %[[VAL]] : !s32i, !cir.vector<4 x !s32i>
380+
// CIR: %{{.*}} = cir.cast bitcast %[[SPLAT]] : !cir.vector<4 x !s32i> -> !cir.vector<2 x !s64i>
381+
382+
// LLVM-LABEL: test_mm_com_epu32_true
383+
// LLVM: store <2 x i64> splat (i64 -1), ptr %[[VAL:.*]], align 16
384+
// LLVM: %[[SPLAT:.*]] = load <2 x i64>, ptr %[[VAL]], align 16
385+
// LLVM: ret <2 x i64> %[[SPLAT]]
386+
387+
// OGCG-LABEL: test_mm_com_epu32_true
388+
// OGCG: ret <2 x i64> splat (i64 -1)
389+
return _mm_com_epu32(a, b, 7);
390+
}

0 commit comments

Comments
 (0)