diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a0181b7ae8f9d..9f361b7613a3d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -280,7 +280,7 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">; def pslldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; - def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; + } let Features = "sse2", @@ -297,6 +297,8 @@ let Features = "sse2", def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + + def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; } let Features = "sse3", Attributes = [NoThrow] in { @@ -308,7 +310,7 @@ let Features = "sse3", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def lddqu : X86Builtin<"_Vector<16, char>(char const *)">; } -let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">; } @@ -574,7 +576,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; - def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; @@ -610,6 +611,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">; @@ -3232,7 +3234,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in { def kmovq : X86Builtin<"unsigned long long int(unsigned long long int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 68ebfdf27ba43..b70252e522974 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2975,6 +2975,75 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_x86_psrldq_byteshift(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 2); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + uint64_t Shift = ImmAPS.getZExtValue(); + + const Pointer &Concat = S.Stk.pop(); + if (!Concat.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned NumElems = Concat.getNumElems(); + const Pointer &Dst = S.Stk.peek(); + PrimType ElemT = Concat.getFieldDesc()->getPrimType(); + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != NumElems; ++I) { + if (I + Shift < NumElems) + Dst.elem(I) = Concat.elem(I + Shift); + else + Dst.elem(I) = T(); + } + }); + + Dst.initializeAllElements(); + + return true; +} + +static bool interp__builtin_x86_palignr(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID) { + assert(Call->getNumArgs() == 3); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(2)); + uint64_t Shift = ImmAPS.getZExtValue(); + + const Pointer &VecB = S.Stk.pop(); + if (!VecB.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &VecA = S.Stk.pop(); + if (!VecA.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &Dst = S.Stk.peek(); + PrimType ElemT = VecA.getFieldDesc()->getPrimType(); + + unsigned LenA = VecA.getNumElems(); + unsigned LenB = VecB.getNumElems(); + + assert(LenA == LenB && (LenA % 16 == 0)); + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != LenA; ++I) { + if (I + Shift < LenA) { + Dst.elem(I) = VecB.elem(I + Shift); + } else if (I + Shift < LenA + LenB) { + Dst.elem(I) = VecA.elem(I + Shift - LenA); + } else { + Dst.elem(I) = T(); + } + } + }); + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3821,6 +3890,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vec_set_v4di: return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_psrldqi128_byteshift: + return interp__builtin_x86_psrldq_byteshift(S, OpPC, Call, BuiltinID); + + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: + return interp__builtin_x86_palignr(S, OpPC, Call, BuiltinID); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7bf28d988f405..90550b4be429a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12351,6 +12351,68 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(Elems.data(), NumElems), E); } + + case X86::BI__builtin_ia32_psrldqi128_byteshift: { + assert(E->getNumArgs() == 2); + + APValue Concat; + APSInt Imm; + if (!EvaluateAsRValue(Info, E->getArg(0), Concat) || + !EvaluateInteger(E->getArg(1), Imm, Info)) + return false; + + unsigned VecLen = Concat.getVectorLength(); + unsigned Shift = Imm.getZExtValue(); + + SmallVector ResultElements; + for (unsigned I = 0; I < VecLen; ++I) { + if (I + Shift < VecLen) { + ResultElements.push_back(Concat.getVectorElt(I + Shift)); + } else { + APSInt Zero(8, /*isUnsigned=*/true); + Zero = 0; + ResultElements.push_back(APValue(Zero)); + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + + case X86::BI__builtin_ia32_palignr128: + case X86::BI__builtin_ia32_palignr256: + case X86::BI__builtin_ia32_palignr512: { + assert(E->getNumArgs() == 3); + + APValue VecA, VecB; + APSInt Imm; + if (!EvaluateAsRValue(Info, E->getArg(0), VecA) || + !EvaluateAsRValue(Info, E->getArg(1), VecB) || + !EvaluateInteger(E->getArg(2), Imm, Info)) + return false; + + if (!VecA.isVector() || !VecB.isVector()) + return false; + + unsigned LenA = VecA.getVectorLength(); + unsigned LenB = VecB.getVectorLength(); + assert(LenA == LenB && (LenA % 16 == 0)); + + unsigned Shift = Imm.getZExtValue(); + SmallVector ResultElements; + for (unsigned I = 0; I < LenA; ++I) { + if (I + Shift < LenA) { + ResultElements.push_back(VecB.getVectorElt(I + Shift)); + } else if (I + Shift < LenA + LenB) { + ResultElements.push_back(VecA.getVectorElt(I + Shift - LenA)); + } else { + APSInt Zero(/*BitWidth=*/8, /*isUnsigned=*/true); + Zero = 0; + ResultElements.push_back(APValue(Zero)); + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } } } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 4299b18243f21..bb9c8943e7776 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -109,6 +109,8 @@ __m256i test_mm256_alignr_epi8(__m256i a, __m256i b) { // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> return _mm256_alignr_epi8(a, b, 2); } +TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2)); +TEST_CONSTEXPR(match_v32qi(_mm256_alignr_epi8(((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 64), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test2_mm256_alignr_epi8 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index bd19363c8d948..cb574dc1816a4 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -2661,6 +2661,8 @@ __m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){ // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> return _mm512_alignr_epi8(__A, __B, 2); } +TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 1, 2)); +TEST_CONSTEXPR(match_v64qi(_mm512_alignr_epi8(((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 128), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m512i __B){ // CHECK-LABEL: test_mm512_mask_alignr_epi8 @@ -2668,6 +2670,7 @@ __m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m5 // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_alignr_epi8(__W, __U, __A, __B, 2); } +TEST_CONSTEXPR(match_v64qi(_mm512_mask_alignr_epi8(((__m512i)(__v64qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127)); __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){ // CHECK-LABEL: test_mm512_maskz_alignr_epi8 @@ -2675,6 +2678,7 @@ __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){ // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_alignr_epi8(__U, __A, __B, 2); } +TEST_CONSTEXPR(match_v64qi(_mm512_maskz_alignr_epi8((__mmask64)0x000000000000000f, ((__m512i)(__v64qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), ((__m512i)(__v64qs){65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 127}), 2), 67, 68, 69, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 1fe1ec08ede88..d08db4829e0f3 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3313,6 +3313,7 @@ __m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128 // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2); } +TEST_CONSTEXPR(match_v16qi(_mm_mask_alignr_epi8(((__m128i)(__v16qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127)); __m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_alignr_epi8 @@ -3320,6 +3321,8 @@ __m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_alignr_epi8(__U, __A, __B, 2); } +TEST_CONSTEXPR(match_v16qi( _mm_maskz_alignr_epi8((__mmask16)0x000f, ((__m128i)(__v16qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qs){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}),2), 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_alignr_epi8 @@ -3327,6 +3330,8 @@ __m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2); } +TEST_CONSTEXPR(match_v32qi(_mm256_mask_alignr_epi8(((__m256i)(__v32qs){127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127}), (__mmask32)0x0000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127)); + __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_alignr_epi8 @@ -3334,6 +3339,7 @@ __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); } +TEST_CONSTEXPR(match_v32qi(_mm256_maskz_alignr_epi8((__mmask32)0x0000000f, ((__m256i)(__v32qs){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), ((__m256i)(__v32qs){33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}), 2), 35, 36, 37, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dbsad_epu8 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index a4494b69219da..9a8c1d0aa19bd 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -102,6 +102,8 @@ __m64 test_mm_alignr_pi8(__m64 a, __m64 b) { // CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> zeroinitializer, <16 x i32> return _mm_alignr_pi8(a, b, 2); } +TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 1), 10, 11, 12, 13, 14, 15, 16, 1)); +TEST_CONSTEXPR(match_v8qi(_mm_alignr_pi8(((__m64)(__v8qs){1, 2, 3, 4, 5, 6, 7, 8}), ((__m64)(__v8qs){9, 10, 11, 12, 13, 14, 15, 16}), 16), 0, 0, 0, 0, 0, 0, 0, 0)); __m64 test_mm_and_si64(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_and_si64 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index 56ff73f08ab32..b4207d1ba4439 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -48,6 +48,8 @@ __m128i test_mm_alignr_epi8(__m128i a, __m128i b) { // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> return _mm_alignr_epi8(a, b, 2); } +TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 2), 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2)); +TEST_CONSTEXPR(match_v16qi(_mm_alignr_epi8(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), ((__m128i)(__v16qi){17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test2_mm_alignr_epi8(__m128i a, __m128i b) { // CHECK-LABEL: test2_mm_alignr_epi8