diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 77e599587edc3..7d9845fbf4cf5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -199,11 +199,15 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; - def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; } +let Features = "sse", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">; +} + let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">; } @@ -221,8 +225,9 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; - def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; - def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; + def psadbw128 + : X86Builtin< + "_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">; @@ -295,6 +300,8 @@ let Features = "sse2", def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + + def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; } let Features = "sse3", Attributes = [NoThrow] in { @@ -500,13 +507,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; + def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; + def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; } let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; - def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; - def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; - def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; + def vpermilps256 + : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; @@ -946,7 +954,8 @@ let Features = "pku", Attributes = [NoThrow] in { def wrpkru : X86Builtin<"void(unsigned int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def sqrtpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">; def sqrtps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 9076946d29657..63da40f621bd6 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SipHash.h" +#include namespace clang { namespace interp { @@ -2994,6 +2995,85 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +static llvm::APFloat apply_x86_sqrt(llvm::APFloat Val, + const llvm::fltSemantics &Semantics) { + if (Val.isNegative() && !Val.isZero()) { + return llvm::APFloat::getQNaN(Semantics); + } else { + double DoubleValue = Val.convertToDouble(); + double SqrtValue = ::sqrt(DoubleValue); + + llvm::APFloat TempValue(SqrtValue); + + bool LosesInfo; + TempValue.convert(Semantics, llvm::APFloat::rmNearestTiesToEven, + &LosesInfo); + return TempValue; + } +} + +static bool interp__builtin_x86_sqrt(InterpState &S, CodePtr OpPC, + const CallExpr *Call, unsigned ID) { + unsigned NumArgs = Call->getNumArgs(); + assert(NumArgs == 1 || NumArgs == 2); + const Expr *ArgExpr = Call->getArg(0); + QualType ArgTy = ArgExpr->getType(); + + if (!(ArgTy->isRealFloatingType() || + (ArgTy->isVectorType() && + ArgTy->castAs()->getElementType()->isRealFloatingType()))) + return false; + + const llvm::fltSemantics *SemanticsPtr; + if (ArgTy->isVectorType()) + SemanticsPtr = &S.getContext().getFloatSemantics( + ArgTy->castAs()->getElementType()); + else + SemanticsPtr = &S.getContext().getFloatSemantics(ArgTy); + const llvm::fltSemantics &Semantics = *SemanticsPtr; + + if (NumArgs == 2) { + if (!Call->getArg(1)->getType()->isIntegerType()) { + return false; + } + APSInt RoundingMode = popToAPSInt(S, Call->getArg(1)); + if (RoundingMode.getZExtValue() != 4) { + return false; + } + } + + // Scalar case + if (!ArgTy->isVectorType()) { + llvm::APFloat Val = S.Stk.pop().getAPFloat(); + Val = apply_x86_sqrt(Val, Semantics); + S.Stk.push(Val); + return true; + } + + // Vector case + assert(ArgTy->isVectorType()); + const auto *VT = ArgTy->castAs(); + + const Pointer &Arg = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + assert(Arg.getFieldDesc()->isPrimitiveArray()); + assert(Dst.getFieldDesc()->isPrimitiveArray()); + assert(Arg.getFieldDesc()->getNumElems() == + Dst.getFieldDesc()->getNumElems()); + + unsigned NumElems = VT->getNumElements(); + + for (unsigned I = 0; I != NumElems; ++I) { + llvm::APFloat Val = Arg.elem(I).getAPFloat(); + Val = apply_x86_sqrt(Val, Semantics); + Dst.elem(I) = Val; + } + + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3753,6 +3833,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vinsertf128_si256: case X86::BI__builtin_ia32_insert128i256: return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_sqrtpd: + case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtpd256: + case X86::BI__builtin_ia32_sqrtps256: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: + return interp__builtin_x86_sqrt(S, OpPC, Call, BuiltinID); default: S.FFDiag(S.Current->getLocation(OpPC), diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b706b14945b6d..242ccfeb5b4e2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -64,6 +64,7 @@ #include "llvm/Support/SipHash.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -12235,6 +12236,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_sqrtpd: + case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtpd256: + case X86::BI__builtin_ia32_sqrtps256: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: { + APValue Source; + if (!EvaluateAsRValue(Info, E->getArg(0), Source)) + return false; + + QualType DestEltTy = E->getType()->castAs()->getElementType(); + const llvm::fltSemantics &Semantics = + Info.Ctx.getFloatTypeSemantics(DestEltTy); + unsigned SourceLen = Source.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen); + + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APValue CurrentEle = Source.getVectorElt(EltNum); + if (DestEltTy->isFloatingType()) { + llvm::APFloat Value = CurrentEle.getFloat(); + if (Value.isNegative() && !Value.isZero()) { + Value = llvm::APFloat::getQNaN(Value.getSemantics()); + } else { + double DoubleValue = Value.convertToDouble(); + double SqrtValue = sqrt(DoubleValue); + llvm::APFloat TempValue(SqrtValue); + bool LosesInfo; + TempValue.convert(Semantics, llvm::RoundingMode::NearestTiesToEven, + &LosesInfo); + Value = TempValue; + } + ResultElements.push_back(APValue(Value)); + } else { + return false; + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } } } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 80e58425cdd71..3055911b970d8 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1458,24 +1458,21 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd())) -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_sqrt_pd(__m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sqrt_pd(__m512d __A) { return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, (__v8df)_mm512_sqrt_pd(__A), (__v8df)_mm512_setzero_pd()); @@ -1494,24 +1491,21 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps())) -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_sqrt_ps(__m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_sqrt_ps(__m512 __A) { return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, (__v16sf)_mm512_sqrt_ps(__A), (__v16sf)_mm512_setzero_ps()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 754f43ad88543..99351610cb1fc 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -3444,61 +3444,53 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { (__v8si)(__m256i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) - static __inline__ __m128d __DEFAULT_FN_ATTRS128 - _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)__W); - } +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)__W); +} - static __inline__ __m128d __DEFAULT_FN_ATTRS128 - _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { - return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, - (__v2df)_mm_sqrt_pd(__A), - (__v2df)_mm_setzero_pd()); - } +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { + return (__m128d)__builtin_ia32_selectpd_128( + (__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)_mm_setzero_pd()); +} - static __inline__ __m256d __DEFAULT_FN_ATTRS256 - _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)__W); - } +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)__W); +} - static __inline__ __m256d __DEFAULT_FN_ATTRS256 - _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { - return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, - (__v4df)_mm256_sqrt_pd(__A), - (__v4df)_mm256_setzero_pd()); - } +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { + return (__m256d)__builtin_ia32_selectpd_256( + (__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)_mm256_setzero_pd()); +} - static __inline__ __m128 __DEFAULT_FN_ATTRS128 - _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)__W); - } +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)__W); +} - static __inline__ __m128 __DEFAULT_FN_ATTRS128 - _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { - return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, - (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)_mm_setzero_ps()); - } +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)_mm_setzero_ps()); +} - static __inline__ __m256 __DEFAULT_FN_ATTRS256 - _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)__W); - } +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)__W); +} - static __inline__ __m256 __DEFAULT_FN_ATTRS256 - _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { - return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, - (__v8sf)_mm256_sqrt_ps(__A), - (__v8sf)_mm256_setzero_ps()); - } +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)_mm256_setzero_ps()); +} static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index a7f70994be9db..2ef8d5085869f 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -335,9 +335,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a, /// A 256-bit vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the square roots of the /// values in the operand. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_sqrt_pd(__m256d __a) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_sqrt_pd(__m256d __a) { return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); } @@ -352,9 +351,7 @@ _mm256_sqrt_pd(__m256d __a) /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the square roots of the /// values in the operand. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_sqrt_ps(__m256 __a) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sqrt_ps(__m256 __a) { return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 6597e7e7d4030..1943624f3424e 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -256,7 +256,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, /// A 128-bit vector of [2 x double]. /// \returns A 128-bit vector of [2 x double] containing the square roots of the /// values in the operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sqrt_pd(__m128d __a) { return __builtin_ia32_sqrtpd((__v2df)__a); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d876b4735a7d2..7c68c1395df5c 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -248,9 +248,7 @@ _mm_sqrt_ss(__m128 __a) /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [4 x float] containing the square roots of the /// values in the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_sqrt_ps(__m128 __a) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sqrt_ps(__m128 __a) { return __builtin_ia32_sqrtps((__v4sf)__a); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 347cd9ee6a667..28132883d4125 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1871,12 +1871,16 @@ __m256d test_mm256_sqrt_pd(__m256d A) { return _mm256_sqrt_pd(A); } +TEST_CONSTEXPR(match_m256d(_mm256_sqrt_pd(_mm256_set_pd(16.0, 9.0, 4.0, 1.0)), 1.0, 2.0, 3.0, 4.0)); + __m256 test_mm256_sqrt_ps(__m256 A) { // CHECK-LABEL: test_mm256_sqrt_ps // CHECK: call {{.*}}<8 x float> @llvm.sqrt.v8f32(<8 x float> %{{.*}}) return _mm256_sqrt_ps(A); } +TEST_CONSTEXPR(match_m256(_mm256_sqrt_ps(_mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)); + void test_mm256_store_pd(double* A, __m256d B) { // CHECK-LABEL: test_mm256_store_pd // CHECK: store <4 x double> %{{.*}}, ptr %{{.*}}, align 32 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 84eaad8d99e61..3fa6872633c87 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -18,6 +18,8 @@ __m512d test_mm512_sqrt_pd(__m512d a) return _mm512_sqrt_pd(a); } +TEST_CONSTEXPR(match_m512d(_mm512_sqrt_pd(_mm512_set_pd(16.0, 9.0, 4.0, 1.0, 16.0, 9.0, 4.0, 1.0)), 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0)); + __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_sqrt_pd @@ -27,6 +29,12 @@ __m512d test_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) return _mm512_mask_sqrt_pd (__W,__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_sqrt_pd( + _mm512_set_pd(800.0, 700.0, 600.0, 500.0, 400.0, 300.0, 200.0, 100.0), + 0b11000011, + _mm512_set_pd(64.0, 49.0, 36.0, 25.0, 16.0, 9.0, 4.0, 1.0)), + 1.0, 2.0, 300.0, 400.0, 500.0, 600.0, 7.0, 8.0)); + __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_maskz_sqrt_pd @@ -36,6 +44,10 @@ __m512d test_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) return _mm512_maskz_sqrt_pd (__U,__A); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_sqrt_pd(0b00001111, + _mm512_set_pd(64.0, 49.0, 36.0, 25.0, 16.0, 9.0, 4.0, 1.0)), + 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0)); + __m512d test_mm512_mask_sqrt_round_pd(__m512d __W,__mmask8 __U,__m512d __A) { // CHECK-LABEL: test_mm512_mask_sqrt_round_pd @@ -68,6 +80,8 @@ __m512 test_mm512_sqrt_ps(__m512 a) return _mm512_sqrt_ps(a); } +TEST_CONSTEXPR(match_m512(_mm512_sqrt_ps(_mm512_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f, 64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)); + __m512 test_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { // CHECK-LABEL: test_mm512_mask_sqrt_ps diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 5282c7ab06dea..67cab0097b641 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -5514,48 +5514,72 @@ __m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_sqrt_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_mask_sqrt_pd(_mm_set_pd(200.0, 100.0), 0b01, _mm_set_pd(9.0, 4.0)), 2.0, 200.0)); + __m128d test_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_maskz_sqrt_pd // CHECK: @llvm.sqrt.v2f64 // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_sqrt_pd(__U,__A); } + +TEST_CONSTEXPR(match_m128d(_mm_maskz_sqrt_pd(0b10, _mm_set_pd(9.0, 4.0)), 0.0, 3.0)); + __m256d test_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_mask_sqrt_pd // CHECK: @llvm.sqrt.v4f64 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_sqrt_pd(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_mask_sqrt_pd(_mm256_set_pd(400.0, 300.0, 200.0, 100.0), 0b1001, _mm256_set_pd(25.0, 16.0, 9.0, 4.0)), 2.0, 200.0, 300.0, 5.0)); + __m256d test_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { // CHECK-LABEL: test_mm256_maskz_sqrt_pd // CHECK: @llvm.sqrt.v4f64 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_sqrt_pd(__U,__A); } + +TEST_CONSTEXPR(match_m256d(_mm256_maskz_sqrt_pd(0b0110, _mm256_set_pd(25.0, 16.0, 9.0, 4.0)), 0.0, 3.0, 4.0, 0.0)); + __m128 test_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_mask_sqrt_ps // CHECK: @llvm.sqrt.v4f32 // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_sqrt_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_mask_sqrt_ps(_mm_set_ps(400.0f, 300.0f, 200.0f, 100.0f), 0b1010, _mm_set_ps(25.0f, 16.0f, 9.0f, 4.0f)), 100.0f, 3.0f, 300.0f, 5.0f)); + __m128 test_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { // CHECK-LABEL: test_mm_maskz_sqrt_ps // CHECK: @llvm.sqrt.v4f32 // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_sqrt_ps(__U,__A); } + +TEST_CONSTEXPR(match_m128(_mm_maskz_sqrt_ps(0b0011, _mm_set_ps(25.0f, 16.0f, 9.0f, 4.0f)), 2.0f, 3.0f, 0.0f, 0.0f)); + __m256 test_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_mask_sqrt_ps // CHECK: @llvm.sqrt.v8f32 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_sqrt_ps(__W,__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_mask_sqrt_ps(_mm256_set_ps(800.0f, 700.0f, 600.0f, 500.0f, 400.0f, 300.0f, 200.0f, 100.0f), 0b11001100, _mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 100.0f, 200.0f, 3.0f, 4.0f, 500.0f, 600.0f, 7.0f, 8.0f)); + __m256 test_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: test_mm256_maskz_sqrt_ps // CHECK: @llvm.sqrt.v8f32 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_sqrt_ps(__U,__A); } + +TEST_CONSTEXPR(match_m256(_mm256_maskz_sqrt_ps(0b11110000, _mm256_set_ps(64.0f, 49.0f, 36.0f, 25.0f, 16.0f, 9.0f, 4.0f, 1.0f)), 0.0f, 0.0f, 0.0f, 0.0f, 5.0f, 6.0f, 7.0f, 8.0f)); + __m128d test_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: test_mm_mask_sub_pd // CHECK: fsub <2 x double> %{{.*}}, %{{.*}} diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 3bad3426b1586..929dad963ce52 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -610,6 +610,7 @@ __m128 test_mm_rsqrt_ps(__m128 x) { return _mm_rsqrt_ps(x); } + __m128 test_mm_rsqrt_ss(__m128 x) { // CHECK-LABEL: test_mm_rsqrt_ss // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}}) @@ -742,6 +743,8 @@ __m128 test_mm_sqrt_ps(__m128 x) { return _mm_sqrt_ps(x); } +TEST_CONSTEXPR(match_m128(_mm_sqrt_ps(_mm_set_ps(16.0f, 9.0f, 4.0f, 1.0f)), 1.0f, 2.0f, 3.0f, 4.0f)); + __m128 test_mm_sqrt_ss(__m128 x) { // CHECK-LABEL: test_mm_sqrt_ss // CHECK: extractelement <4 x float> {{.*}}, i64 0 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 84b90c09444c2..2e009ba432207 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1423,6 +1423,8 @@ __m128d test_mm_sqrt_pd(__m128d A) { return _mm_sqrt_pd(A); } +TEST_CONSTEXPR(match_m128d(_mm_sqrt_pd(_mm_set_pd(4.0,4.0)), 2.0, 2.0)); + __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_sqrt_sd // CHECK: extractelement <2 x double> %{{.*}}, i64 0 @@ -1431,6 +1433,7 @@ __m128d test_mm_sqrt_sd(__m128d A, __m128d B) { return _mm_sqrt_sd(A, B); } + __m128i test_mm_sra_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sra_epi16 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})