Skip to content

Commit 2500d45

Browse files
committed
[CIR][X86] Implement lowering for AVX512 ktest builtins (kortestc, kortestz)
This patch adds CIR codegen support for the AVX512 mask test builtins on X86, including kortestc and kortestz across all supported mask widths (qi, hi, si, di). Each builtin is lowered to the expected vector<i1> mask logic and scalar comparison form in CIR, consistent with the semantics of the corresponding LLVM implementations. Because ClangIR does not yet provide a dedicated `zext` operation, the lowering emulates zero-extension by first converting the boolean result through `bool_to_int` and then performing an integer cast to the final result type. This reproduces the `icmp` + `zext` pattern used in LLVM IR and maintains semantic equivalence.
1 parent 7c33b82 commit 2500d45

File tree

4 files changed

+474
-6
lines changed

4 files changed

+474
-6
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,18 @@ static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
220220
ops[0].getType());
221221
}
222222

223+
static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
224+
const std::string &intrinsicName,
225+
SmallVectorImpl<mlir::Value> &ops) {
226+
auto intTy = cast<cir::IntType>(ops[0].getType());
227+
unsigned numElts = intTy.getWidth();
228+
mlir::Value lhsVec = getMaskVecValue(builder, loc, ops[0], numElts);
229+
mlir::Value rhsVec = getMaskVecValue(builder, loc, ops[1], numElts);
230+
mlir::Type vecTy = lhsVec.getType();
231+
return emitIntrinsicCallOp(builder, loc, intrinsicName, vecTy,
232+
mlir::ValueRange{lhsVec, rhsVec});
233+
}
234+
223235
static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
224236
mlir::Value vec, mlir::Value value,
225237
mlir::Value indexOp) {
@@ -1121,26 +1133,61 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
11211133
case X86::BI__builtin_ia32_vpcomuw:
11221134
case X86::BI__builtin_ia32_vpcomud:
11231135
case X86::BI__builtin_ia32_vpcomuq:
1136+
cgm.errorNYI(expr->getSourceRange(),
1137+
std::string("unimplemented X86 builtin call: ") +
1138+
getContext().BuiltinInfo.getName(builtinID));
1139+
return {};
11241140
case X86::BI__builtin_ia32_kortestcqi:
11251141
case X86::BI__builtin_ia32_kortestchi:
11261142
case X86::BI__builtin_ia32_kortestcsi:
1127-
case X86::BI__builtin_ia32_kortestcdi:
1143+
case X86::BI__builtin_ia32_kortestcdi: {
1144+
mlir::Location loc = getLoc(expr->getExprLoc());
1145+
cir::IntType ty = cast<cir::IntType>(ops[0].getType());
1146+
cir::ConstantOp allOnesOp = cast<cir::ConstantOp>(
1147+
builder.getConstAPInt(loc, ty, APInt::getAllOnes(ty.getWidth())));
1148+
mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
1149+
mlir::Value cmp =
1150+
cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allOnesOp);
1151+
return builder.createCast(cir::CastKind::bool_to_int, cmp,
1152+
cgm.convertType(expr->getType()));
1153+
}
11281154
case X86::BI__builtin_ia32_kortestzqi:
11291155
case X86::BI__builtin_ia32_kortestzhi:
11301156
case X86::BI__builtin_ia32_kortestzsi:
1131-
case X86::BI__builtin_ia32_kortestzdi:
1157+
case X86::BI__builtin_ia32_kortestzdi: {
1158+
mlir::Location loc = getLoc(expr->getExprLoc());
1159+
cir::IntType ty = cast<cir::IntType>(ops[0].getType());
1160+
cir::ConstantOp allZerosOp = builder.getNullValue(ty, loc);
1161+
mlir::Value orOp = emitX86MaskLogic(builder, loc, cir::BinOpKind::Or, ops);
1162+
mlir::Value cmp =
1163+
cir::CmpOp::create(builder, loc, cir::CmpOpKind::eq, orOp, allZerosOp);
1164+
return builder.createCast(cir::CastKind::bool_to_int, cmp,
1165+
cgm.convertType(expr->getType()));
1166+
}
11321167
case X86::BI__builtin_ia32_ktestcqi:
1168+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1169+
"x86.avx512.ktestc.b", ops);
11331170
case X86::BI__builtin_ia32_ktestzqi:
1171+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1172+
"x86.avx512.ktestz.b", ops);
11341173
case X86::BI__builtin_ia32_ktestchi:
1174+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1175+
"x86.avx512.ktestc.w", ops);
11351176
case X86::BI__builtin_ia32_ktestzhi:
1177+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1178+
"x86.avx512.ktestz.w", ops);
11361179
case X86::BI__builtin_ia32_ktestcsi:
1180+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1181+
"x86.avx512.ktestc.d", ops);
11371182
case X86::BI__builtin_ia32_ktestzsi:
1183+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1184+
"x86.avx512.ktestz.d", ops);
11381185
case X86::BI__builtin_ia32_ktestcdi:
1186+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1187+
"x86.avx512.ktestc.q", ops);
11391188
case X86::BI__builtin_ia32_ktestzdi:
1140-
cgm.errorNYI(expr->getSourceRange(),
1141-
std::string("unimplemented X86 builtin call: ") +
1142-
getContext().BuiltinInfo.getName(builtinID));
1143-
return {};
1189+
return emitX86MaskTest(builder, getLoc(expr->getExprLoc()),
1190+
"x86.avx512.ktestz.q", ops);
11441191
case X86::BI__builtin_ia32_kaddqi:
11451192
return emitX86MaskAddLogic(builder, getLoc(expr->getExprLoc()),
11461193
"x86.avx512.kadd.b", ops);

clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,3 +559,216 @@ __m512i test_mm512_shufflehi_epi16(__m512i __A) {
559559
// OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
560560
return _mm512_shufflehi_epi16(__A, 5);
561561
}
562+
563+
unsigned char test_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) {
564+
// CIR-LABEL: _kortestc_mask32_u8
565+
// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<4294967295> : !u32i
566+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
567+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
568+
// CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<32 x !cir.int<u, 1>>
569+
// CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
570+
// CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ALL_ONES]]) : !u32i, !cir.bool
571+
// CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
572+
// CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
573+
574+
// LLVM-LABEL: _kortestc_mask32_u8
575+
// LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
576+
// LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
577+
// LLVM: %[[OR:.*]] = or <32 x i1> %[[LHS]], %[[RHS]]
578+
// LLVM: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
579+
// LLVM: %[[CMP:.*]] = icmp eq i32 %[[CAST]], -1
580+
// LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
581+
// LLVM: trunc i32 %[[ZEXT]] to i8
582+
583+
// OGCG-LABEL: _kortestc_mask32_u8
584+
// OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
585+
// OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
586+
// OGCG: %[[OR:.*]] = or <32 x i1> %[[LHS]], %[[RHS]]
587+
// OGCG: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
588+
// OGCG: %[[CMP:.*]] = icmp eq i32 %[[CAST]], -1
589+
// OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
590+
// OGCG: trunc i32 %[[ZEXT]] to i8
591+
return _kortestc_mask32_u8(__A, __B);
592+
}
593+
594+
unsigned char test_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
595+
// CIR-LABEL: _kortestc_mask64_u8
596+
// CIR: %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
597+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
598+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
599+
// CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<64 x !cir.int<u, 1>>
600+
// CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
601+
// CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ALL_ONES]]) : !u64i, !cir.bool
602+
// CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
603+
// CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
604+
605+
// LLVM-LABEL: _kortestc_mask64_u8
606+
// LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
607+
// LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
608+
// LLVM: %[[OR:.*]] = or <64 x i1> %[[LHS]], %[[RHS]]
609+
// LLVM: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
610+
// LLVM: %[[CMP:.*]] = icmp eq i64 %[[CAST]], -1
611+
// LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
612+
// LLVM: trunc i32 %[[ZEXT]] to i8
613+
614+
// OGCG-LABEL: _kortestc_mask64_u8
615+
// OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
616+
// OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
617+
// OGCG: %[[OR:.*]] = or <64 x i1> %[[LHS]], %[[RHS]]
618+
// OGCG: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
619+
// OGCG: %[[CMP:.*]] = icmp eq i64 %[[CAST]], -1
620+
// OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
621+
// OGCG: trunc i32 %[[ZEXT]] to i8
622+
return _kortestc_mask64_u8(__A, __B);
623+
}
624+
625+
unsigned char test_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) {
626+
// CIR-LABEL: _kortestz_mask32_u8
627+
// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i
628+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
629+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
630+
// CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<32 x !cir.int<u, 1>>
631+
// CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
632+
// CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ZERO]]) : !u32i, !cir.bool
633+
// CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
634+
// CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
635+
636+
// LLVM-LABEL: _kortestz_mask32_u8
637+
// LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
638+
// LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
639+
// LLVM: %[[OR:.*]] = or <32 x i1> %[[LHS]], %[[RHS]]
640+
// LLVM: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
641+
// LLVM: %[[CMP:.*]] = icmp eq i32 %[[CAST]], 0
642+
// LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
643+
// LLVM: trunc i32 %[[ZEXT]] to i8
644+
645+
// OGCG-LABEL: _kortestz_mask32_u8
646+
// OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
647+
// OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
648+
// OGCG: %[[OR:.*]] = or <32 x i1> %[[LHS]], %[[RHS]]
649+
// OGCG: %[[CAST:.*]] = bitcast <32 x i1> %[[OR]] to i32
650+
// OGCG: %[[CMP:.*]] = icmp eq i32 %[[CAST]], 0
651+
// OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
652+
// OGCG: trunc i32 %[[ZEXT]] to i8
653+
return _kortestz_mask32_u8(__A, __B);
654+
}
655+
656+
unsigned char test_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
657+
// CIR-LABEL: _kortestz_mask64_u8
658+
// CIR: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
659+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
660+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
661+
// CIR: %[[OR:.*]] = cir.binop(or, %[[LHS]], %[[RHS]]) : !cir.vector<64 x !cir.int<u, 1>>
662+
// CIR: %[[OR_INT:.*]] = cir.cast bitcast %[[OR]] : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
663+
// CIR: %[[CMP:.*]] = cir.cmp(eq, %[[OR_INT]], %[[ZERO]]) : !u64i, !cir.bool
664+
// CIR: %[[B2I:.*]] = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
665+
// CIR: cir.cast integral %[[B2I]] : !s32i -> !u8i
666+
667+
// LLVM-LABEL: _kortestz_mask64_u8
668+
// LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
669+
// LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
670+
// LLVM: %[[OR:.*]] = or <64 x i1> %[[LHS]], %[[RHS]]
671+
// LLVM: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
672+
// LLVM: %[[CMP:.*]] = icmp eq i64 %[[CAST]], 0
673+
// LLVM: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
674+
// LLVM: trunc i32 %[[ZEXT]] to i8
675+
676+
// OGCG-LABEL: _kortestz_mask64_u8
677+
// OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
678+
// OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
679+
// OGCG: %[[OR:.*]] = or <64 x i1> %[[LHS]], %[[RHS]]
680+
// OGCG: %[[CAST:.*]] = bitcast <64 x i1> %[[OR]] to i64
681+
// OGCG: %[[CMP:.*]] = icmp eq i64 %[[CAST]], 0
682+
// OGCG: %[[ZEXT:.*]] = zext i1 %[[CMP]] to i32
683+
// OGCG: trunc i32 %[[ZEXT]] to i8
684+
return _kortestz_mask64_u8(__A, __B);
685+
}
686+
687+
688+
unsigned char test_ktestc_mask32_u8(__mmask32 A, __mmask32 B) {
689+
// CIR-LABEL: _ktestc_mask32_u8
690+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
691+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
692+
// CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestc.d"
693+
// CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<32 x !cir.int<u,1>> -> !u32i
694+
// CIR: cir.cast integral %[[CAST]] : !u32i -> !u8i
695+
696+
// LLVM-LABEL: _ktestc_mask32_u8
697+
// LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
698+
// LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
699+
// LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %[[LHS]], <32 x i1> %[[RHS]])
700+
// LLVM: trunc i32 %[[RES]] to i8
701+
702+
// OGCG-LABEL: _ktestc_mask32_u8
703+
// OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
704+
// OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
705+
// OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.d
706+
// OGCG: trunc i32 %[[RES]] to i8
707+
return _ktestc_mask32_u8(A, B);
708+
}
709+
710+
unsigned char test_ktestz_mask32_u8(__mmask32 A, __mmask32 B) {
711+
// CIR-LABEL: _ktestz_mask32_u8
712+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
713+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
714+
// CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestz.d"
715+
// CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<32 x !cir.int<u,1>> -> !u32i
716+
// CIR: cir.cast integral %[[CAST]] : !u32i -> !u8i
717+
718+
// LLVM-LABEL: _ktestz_mask32_u8
719+
// LLVM: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
720+
// LLVM: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
721+
// LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %[[LHS]], <32 x i1> %[[RHS]])
722+
// LLVM: trunc i32 %[[RES]] to i8
723+
724+
// OGCG-LABEL: _ktestz_mask32_u8
725+
// OGCG: %[[LHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
726+
// OGCG: %[[RHS:.*]] = bitcast i32 %{{.*}} to <32 x i1>
727+
// OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.d
728+
// OGCG: trunc i32 %[[RES]] to i8
729+
return _ktestz_mask32_u8(A, B);
730+
}
731+
732+
unsigned char test_ktestc_mask64_u8(__mmask64 A, __mmask64 B) {
733+
// CIR-LABEL: _ktestc_mask64_u8
734+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
735+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
736+
// CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestc.q"
737+
// CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<64 x !cir.int<u,1>> -> !u64i
738+
// CIR: cir.cast integral %[[CAST]] : !u64i -> !u8i
739+
740+
// LLVM-LABEL: _ktestc_mask64_u8
741+
// LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
742+
// LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
743+
// LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %[[LHS]], <64 x i1> %[[RHS]])
744+
// LLVM: trunc i32 %[[RES]] to i8
745+
746+
// OGCG-LABEL: _ktestc_mask64_u8
747+
// OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
748+
// OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
749+
// OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestc.q
750+
// OGCG: trunc i32 %[[RES]] to i8
751+
return _ktestc_mask64_u8(A, B);
752+
}
753+
754+
unsigned char test_ktestz_mask64_u8(__mmask64 A, __mmask64 B) {
755+
// CIR-LABEL: _ktestz_mask64_u8
756+
// CIR: %[[LHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
757+
// CIR: %[[RHS:.*]] = cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
758+
// CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.ktestz.q"
759+
// CIR: %[[CAST:.*]] = cir.cast bitcast %[[RES]] : !cir.vector<64 x !cir.int<u,1>> -> !u64i
760+
// CIR: cir.cast integral %[[CAST]] : !u64i -> !u8i
761+
762+
// LLVM-LABEL: _ktestz_mask64_u8
763+
// LLVM: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
764+
// LLVM: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
765+
// LLVM: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %[[LHS]], <64 x i1> %[[RHS]])
766+
// LLVM: trunc i32 %[[RES]] to i8
767+
768+
// OGCG-LABEL: _ktestz_mask64_u8
769+
// OGCG: %[[LHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
770+
// OGCG: %[[RHS:.*]] = bitcast i64 %{{.*}} to <64 x i1>
771+
// OGCG: %[[RES:.*]] = call i32 @llvm.x86.avx512.ktestz.q
772+
// OGCG: trunc i32 %[[RES]] to i8
773+
return _ktestz_mask64_u8(A, B);
774+
}

0 commit comments

Comments
 (0)