Skip to content

Commit 28819c6

Browse files
committed
[Headers][X86] Improve PALIGNR helper: unify align/shuffle logic and add zero-fill support
1 parent 269f264 commit 28819c6

File tree

8 files changed

+113
-4
lines changed

8 files changed

+113
-4
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ let Features = "sse3", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
315315
def lddqu : X86Builtin<"_Vector<16, char>(char const *)">;
316316
}
317317

318-
let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
318+
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
319319
def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">;
320320
}
321321

@@ -609,8 +609,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
609609

610610
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
611611
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
612-
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, "
613-
"_Vector<32, char>, _Constant int)">;
612+
614613
def psadbw256
615614
: X86Builtin<
616615
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
@@ -634,6 +633,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
634633
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
635634
def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
636635
def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
636+
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
637637

638638
def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
639639
def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
@@ -3294,7 +3294,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in {
32943294
def kmovq : X86Builtin<"unsigned long long int(unsigned long long int)">;
32953295
}
32963296

3297-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
3297+
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
32983298
def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">;
32993299
}
33003300

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3447,6 +3447,45 @@ static bool interp__builtin_ia32_shuffle_generic(
34473447
return true;
34483448
}
34493449

3450+
3451+
static bool interp__builtin_x86_palignr(
3452+
InterpState &S, CodePtr OpPC, const CallExpr *Call,
3453+
llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned, unsigned)>
3454+
GetSourceIndex) {
3455+
3456+
assert(Call->getNumArgs() == 3);
3457+
unsigned Shift = popToAPSInt(S, Call->getArg(2)).getZExtValue() & 0xff;
3458+
3459+
QualType Arg0Type = Call->getArg(0)->getType();
3460+
const auto *VecT = Arg0Type->castAs<VectorType>();
3461+
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
3462+
unsigned NumElems = VecT->getNumElements();
3463+
3464+
const Pointer &B = S.Stk.pop<Pointer>();
3465+
const Pointer &A = S.Stk.pop<Pointer>();
3466+
const Pointer &Dst = S.Stk.peek<Pointer>();
3467+
3468+
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
3469+
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, Shift, NumElems);
3470+
3471+
if (SrcIdx < 0) {
3472+
// Zero out this element
3473+
if (ElemT == PT_Float) {
3474+
Dst.elem<Floating>(DstIdx) = Floating(
3475+
S.getASTContext().getFloatTypeSemantics(VecT->getElementType()));
3476+
} else {
3477+
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(DstIdx) = T::from(0); });
3478+
}
3479+
} else {
3480+
const Pointer &Src = (SrcVecIdx == 0) ? A : B;
3481+
TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
3482+
}
3483+
}
3484+
Dst.initializeAllElements();
3485+
3486+
return true;
3487+
}
3488+
34503489
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34513490
uint32_t BuiltinID) {
34523491
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4636,6 +4675,25 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
46364675
return APInt(8, 0);
46374676
});
46384677

4678+
case X86::BI__builtin_ia32_palignr128:
4679+
case X86::BI__builtin_ia32_palignr256:
4680+
case X86::BI__builtin_ia32_palignr512:
4681+
return interp__builtin_x86_palignr(S, OpPC, Call, [](unsigned DstIdx, unsigned Shift, unsigned NumElems) {
4682+
// Default to -1 → zero-fill this destination element
4683+
unsigned VecIdx = 0;
4684+
int ElemIdx = -1;
4685+
4686+
// Elements come from VecB first, then VecA after the shift boundary
4687+
unsigned ShiftedIdx = DstIdx + Shift;
4688+
if(ShiftedIdx < NumElems) { // from VecB
4689+
VecIdx = 1;
4690+
ElemIdx = DstIdx + Shift;
4691+
}else if(ShiftedIdx < 2 * NumElems) { // from VecA
4692+
ElemIdx = DstIdx + Shift - NumElems;
4693+
}
4694+
return std::pair<unsigned, int>{VecIdx,ElemIdx};
4695+
});
4696+
46394697
default:
46404698
S.FFDiag(S.Current->getLocation(OpPC),
46414699
diag::note_invalid_subexpr_in_const_expr)

clang/lib/AST/ExprConstant.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13080,6 +13080,43 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1308013080

1308113081
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1308213082
}
13083+
13084+
13085+
case X86::BI__builtin_ia32_palignr128:
13086+
case X86::BI__builtin_ia32_palignr256:
13087+
case X86::BI__builtin_ia32_palignr512: {
13088+
assert(E->getNumArgs() == 3);
13089+
13090+
APValue VecA, VecB;
13091+
APSInt Imm;
13092+
if (!EvaluateAsRValue(Info, E->getArg(0), VecA) ||
13093+
!EvaluateAsRValue(Info, E->getArg(1), VecB) ||
13094+
!EvaluateInteger(E->getArg(2), Imm, Info))
13095+
return false;
13096+
13097+
if (!VecA.isVector() || !VecB.isVector())
13098+
return false;
13099+
13100+
unsigned LenA = VecA.getVectorLength();
13101+
unsigned LenB = VecB.getVectorLength();
13102+
assert(LenA == LenB && (LenA % 16 == 0));
13103+
13104+
unsigned Shift = Imm.getZExtValue() & 0xff;
13105+
SmallVector<APValue> ResultElements;
13106+
for (unsigned I = 0; I < LenA; ++I) {
13107+
if (I + Shift < LenA) {
13108+
ResultElements.push_back(VecB.getVectorElt(I + Shift));
13109+
} else if (I + Shift < LenA + LenB) {
13110+
ResultElements.push_back(VecA.getVectorElt(I + Shift - LenA));
13111+
} else {
13112+
APSInt Zero(/*BitWidth=*/8, /*isUnsigned=*/true);
13113+
Zero = 0;
13114+
ResultElements.push_back(APValue(Zero));
13115+
}
13116+
}
13117+
13118+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
13119+
}
1308313120
}
1308413121
}
1308513122

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,14 @@ __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) {
109109
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
110110
return _mm256_alignr_epi8(a, b, 2);
111111
}
112+
TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2));
112113

113114
__m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) {
114115
// CHECK-LABEL: test2_mm256_alignr_epi8
115116
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
116117
return _mm256_alignr_epi8(a, b, 17);
117118
}
119+
TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 64), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
118120

119121
__m256i test_mm256_and_si256(__m256i a, __m256i b) {
120122
// CHECK-LABEL: test_mm256_and_si256

clang/test/CodeGen/X86/avx512bw-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2692,20 +2692,24 @@ __m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){
26922692
// CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113>
26932693
return _mm512_alignr_epi8(__A, __B, 2);
26942694
}
2695+
TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2));
2696+
TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 64), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
26952697

26962698
__m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m512i __B){
26972699
// CHECK-LABEL: test_mm512_mask_alignr_epi8
26982700
// CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113>
26992701
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
27002702
return _mm512_mask_alignr_epi8(__W, __U, __A, __B, 2);
27012703
}
2704+
TEST_CONSTEXPR(match_v64qi(_mm512_mask_alignr_epi8(((__m512i)(__v64qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127));
27022705

27032706
__m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){
27042707
// CHECK-LABEL: test_mm512_maskz_alignr_epi8
27052708
// CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113>
27062709
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
27072710
return _mm512_maskz_alignr_epi8(__U, __A, __B, 2);
27082711
}
2712+
TEST_CONSTEXPR(match_v64qi(_mm512_maskz_alignr_epi8((__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
27092713

27102714

27112715

clang/test/CodeGen/X86/avx512vlbw-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3376,27 +3376,31 @@ __m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128
33763376
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
33773377
return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2);
33783378
}
3379+
TEST_CONSTEXPR(match_v16qi(_mm_mask_alignr_epi8(((__m128i)(__v16qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127));
33793380

33803381
__m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
33813382
// CHECK-LABEL: test_mm_maskz_alignr_epi8
33823383
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
33833384
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
33843385
return _mm_maskz_alignr_epi8(__U, __A, __B, 2);
33853386
}
3387+
TEST_CONSTEXPR(match_v16qi( _mm_maskz_alignr_epi8((__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}),2), 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
33863388

33873389
__m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
33883390
// CHECK-LABEL: test_mm256_mask_alignr_epi8
33893391
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
33903392
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
33913393
return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2);
33923394
}
3395+
TEST_CONSTEXPR(match_v32qi(_mm256_mask_alignr_epi8(((__m256i)(__v32qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask32)0x0000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127));
33933396

33943397
__m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
33953398
// CHECK-LABEL: test_mm256_maskz_alignr_epi8
33963399
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
33973400
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
33983401
return _mm256_maskz_alignr_epi8(__U, __A, __B, 2);
33993402
}
3403+
TEST_CONSTEXPR(match_v32qi(_mm256_maskz_alignr_epi8((__mmask32)0x0000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
34003404

34013405
__m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) {
34023406
// CHECK-LABEL: test_mm_dbsad_epu8

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ __m64 test_mm_alignr_pi8(__m64 a, __m64 b) {
102102
// CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
103103
return _mm_alignr_pi8(a, b, 2);
104104
}
105+
TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 1), 10, 11, 12, 13, 14, 15, 16, 1));
106+
TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 16), 0, 0, 0, 0, 0, 0, 0, 0));
105107

106108
__m64 test_mm_and_si64(__m64 a, __m64 b) {
107109
// CHECK-LABEL: test_mm_and_si64

clang/test/CodeGen/X86/ssse3-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ __m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
4848
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
4949
return _mm_alignr_epi8(a, b, 2);
5050
}
51+
TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2));
52+
TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
5153

5254
__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
5355
// CHECK-LABEL: test2_mm_alignr_epi8

0 commit comments

Comments
 (0)