diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index e98bee28c15be..45c7743938d73 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -277,8 +277,6 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">; - def pslldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; - def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; } let Features = "sse2", @@ -295,6 +293,11 @@ let Features = "sse2", def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + + def pslldqi128_byteshift + : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; + def psrldqi128_byteshift + : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; } let Features = "sse3", Attributes = [NoThrow] in { @@ -591,12 +594,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psrldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; @@ -655,6 +656,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def psrlv4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; def psllv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; def psrlv2di : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; + def pslldqi256_byteshift + : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; + def psrldqi256_byteshift + : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; } let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<256>] in { @@ -1360,6 +1365,10 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">; def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; + def pslldqi512_byteshift + : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Constant int)">; + def psrldqi512_byteshift + : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Constant int)">; } let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { @@ -2058,8 +2067,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; def psrlw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; - def pslldqi512_byteshift : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Constant int)">; - def psrldqi512_byteshift : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Constant int)">; } let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<128>] in { diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index a2e97fcafdfef..5e3c7a0728487 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2878,6 +2878,35 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_byteshift(InterpState &S, CodePtr OpPC, + const CallExpr *Call, uint32_t BuiltinID, + bool isLeft) { + APSInt Amt = popToAPSInt(S, Call->getArg(1)); + unsigned ShiftVal = (unsigned)Amt.getZExtValue() & 0xff; + + const Pointer &VecPtr = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + unsigned NumElts = VecPtr.getNumElems(); + const unsigned LaneBytes = 16; + assert(NumElts % LaneBytes == 0); + + for (unsigned LaneBase = 0; LaneBase < NumElts; LaneBase += LaneBytes) { + for (unsigned I = 0; I < LaneBytes; ++I) { + int Src = isLeft ? (I + ShiftVal) : (int)I - (int)ShiftVal; + if (Src >= 0 && (unsigned)Src < LaneBytes) { + Dst.elem(LaneBase + I) = + VecPtr.elem(LaneBase + (unsigned)Src); + } else { + Dst.elem(LaneBase + I) = 0; + } + } + } + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3667,6 +3696,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI__builtin_elementwise_fshr: return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshr); + case clang::X86::BI__builtin_ia32_pslldqi128_byteshift: + case clang::X86::BI__builtin_ia32_pslldqi256_byteshift: + case clang::X86::BI__builtin_ia32_pslldqi512_byteshift: + return interp__builtin_byteshift(S, OpPC, Call, BuiltinID, /*IsLeft=*/true); + case clang::X86::BI__builtin_ia32_psrldqi128_byteshift: + case clang::X86::BI__builtin_ia32_psrldqi256_byteshift: + case clang::X86::BI__builtin_ia32_psrldqi512_byteshift: + return interp__builtin_byteshift(S, OpPC, Call, BuiltinID, + /*IsLeft=*/false); case X86::BI__builtin_ia32_insertf32x4_256: case X86::BI__builtin_ia32_inserti32x4_256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b706b14945b6d..d6d7c33d3d7b4 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12191,6 +12191,50 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_pslldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_pslldqi256_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: + case X86::BI__builtin_ia32_pslldqi512_byteshift: + case X86::BI__builtin_ia32_psrldqi512_byteshift: { + APSInt Amt; + if (!EvaluateInteger(E->getArg(1), Amt, Info)) + return false; + unsigned ShiftVal = (unsigned)Amt.getZExtValue() & 0xff; + + APValue Vec; + if (!Evaluate(Vec, Info, E->getArg(0)) || !Vec.isVector()) + return false; + + unsigned NumElts = Vec.getVectorLength(); + const unsigned LaneBytes = 16; + assert(NumElts % LaneBytes == 0); + + SmallVector Result; + Result.resize(NumElts, APValue(0)); + + bool IsLeft = + (E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi128_byteshift || + E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi256_byteshift || + E->getBuiltinCallee() == X86::BI__builtin_ia32_pslldqi512_byteshift); + + if (ShiftVal >= LaneBytes) + return ZeroInitialization(E); + + for (unsigned LaneBase = 0; LaneBase < NumElts; LaneBase += LaneBytes) { + for (unsigned I = 0; I < LaneBytes; ++I) { + int src = IsLeft ? (I + ShiftVal) : (int)I - (int)ShiftVal; + + if (src >= 0 && (unsigned)src < LaneBytes) + Result[LaneBase + I] = Vec.getVectorElt(LaneBase + (unsigned)src); + else + Result[LaneBase + I] = APValue(0); + } + } + + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_insertf32x4_256: case X86::BI__builtin_ia32_inserti32x4_256: case X86::BI__builtin_ia32_insertf64x2_256: diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index eff2797e87c75..d54125dc3ded6 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1196,6 +1196,8 @@ __m256i test_mm256_slli_si256(__m256i a) { // CHECK: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> return _mm256_slli_si256(a, 3); } +TEST_CONSTEXPR(match_v32qi(_mm256_slli_si256((__m256i)(__v32qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116}, 5), 0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11, 0,0,0,0,0,101,102,103,104,105,106,107,108,109,110,111)) +TEST_CONSTEXPR(match_v32qi(_mm256_slli_si256((__m256i)(__v32qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) __m128i test_mm_sllv_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_sllv_epi32 @@ -1339,6 +1341,8 @@ __m256i test_mm256_srli_si256(__m256i a) { // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> return _mm256_srli_si256(a, 3); } +TEST_CONSTEXPR(match_v32qi(_mm256_srli_si256((__m256i)(__v32qi){ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116}, 5), 6,7,8,9,10,11,12,13,14,15,16,0,0,0,0,0, 106,107,108,109,110,111,112,113,114,115,116,0,0,0,0,0)) +TEST_CONSTEXPR(match_v32qi(_mm256_srli_si256((__m256i)(__v32qi){ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) __m128i test_mm_srlv_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_srlv_epi32 diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 3f42ac0268978..949671c900ec4 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -2721,3 +2721,19 @@ void test_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.512 _mm512_mask_cvtusepi16_storeu_epi8 ( __P, __M, __A); } + +__m512i test_mm512_bslli_epi16(__m512i a) { + // CHECK-LABEL: @test_bslli + // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> zeroinitializer, <64 x i32> + return _mm512_bslli_epi128(a, 4); +} +TEST_CONSTEXPR(match_v64qi(_mm512_bslli_epi128((__m512i)(__v64qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64}, 4), 0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12, 0,0,0,0,17,18,19,20,21,22,23,24,25,26,27,28, 0,0,0,0,33,34,35,36,37,38,39,40,41,42,43,44, 0,0,0,0,49,50,51,52,53,54,55,56,57,58,59,60)); +TEST_CONSTEXPR(match_v64qi(_mm512_bslli_epi128((__m512i)(__v64qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48, 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); + +__m512i test_mm512_bsrli_epi16(__m512i a) { + // CHECK-LABEL: @test_bsrli + // CHECK: shufflevector <64 x i8> %{{.*}}, <64 x i8> zeroinitializer, <64 x i32> + return _mm512_bsrli_epi128(a, 4); +} +TEST_CONSTEXPR(match_v64qi(_mm512_bsrli_epi128((__m512i)(__v64qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48, 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64}, 4), 5,6,7,8,9,10,11,12,13,14,15,16,0,0,0,0, 21,22,23,24,25,26,27,28,29,30,31,32,0,0,0,0, 37,38,39,40,41,42,43,44,45,46,47,48,0,0,0,0, 53,54,55,56,57,58,59,60,61,62,63,64,0,0,0,0)); +TEST_CONSTEXPR(match_v64qi(_mm512_bsrli_epi128((__m512i)(__v64qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32, 33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48, 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 84b90c09444c2..1a865701e536d 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1562,6 +1562,8 @@ __m128i test_mm_srli_si128(__m128i A) { // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> return _mm_srli_si128(A, 5); } +TEST_CONSTEXPR(match_v16qi(_mm_slli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 3), 0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,13)) +TEST_CONSTEXPR(match_v16qi(_mm_slli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) __m128i test_mm_srli_si128_2(__m128i A) { // CHECK-LABEL: test_mm_srli_si128_2 @@ -1569,6 +1571,9 @@ __m128i test_mm_srli_si128_2(__m128i A) { return _mm_srli_si128(A, 17); } +TEST_CONSTEXPR(match_v16qi(_mm_srli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 3), 4,5,6,7,8,9,10,11,12,13,14,15,16,0,0,0)) +TEST_CONSTEXPR(match_v16qi(_mm_srli_si128((__m128i)(__v16qi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}, 16), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)) + void test_mm_store_pd(double* A, __m128d B) { // CHECK-LABEL: test_mm_store_pd // CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16