diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index cb08e2107f072..14c7d636ad51e 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -866,16 +866,6 @@ let Features = "sha", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def sha256msg2 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; } -let Features = "fma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vfmaddss3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; - def vfmaddsd3 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; -} - -let Features = "fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vfmaddss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; - def vfmaddsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; -} - let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vfmaddsubps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">; def vfmaddsubpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">; diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index 2381b2e7cf2cf..00c8a1cf16e31 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -1028,16 +1028,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false); - case X86::BI__builtin_ia32_vfmaddss3: - case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddsh3_mask: case X86::BI__builtin_ia32_vfmaddss3_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: return EmitScalarFMAExpr(*this, E, Ops, Ops[0]); - case X86::BI__builtin_ia32_vfmaddss: - case X86::BI__builtin_ia32_vfmaddsd: - return EmitScalarFMAExpr(*this, E, Ops, - Constant::getNullValue(Ops[0]->getType())); case X86::BI__builtin_ia32_vfmaddsh3_maskz: case X86::BI__builtin_ia32_vfmaddss3_maskz: case X86::BI__builtin_ia32_vfmaddsd3_maskz: diff --git a/clang/lib/Headers/fma4intrin.h b/clang/lib/Headers/fma4intrin.h index e0a0e4c968950..20b8030b77adc 100644 --- a/clang/lib/Headers/fma4intrin.h +++ b/clang/lib/Headers/fma4intrin.h @@ -40,16 +40,14 @@ _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], __C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], __C[0])); } static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR @@ -64,16 +62,14 @@ _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], -__C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], -__C[0])); } static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR @@ -88,16 +84,14 @@ _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], __C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], __C[0])); } static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR @@ -112,16 +106,14 @@ _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { + return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0])); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { + return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0])); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/fmaintrin.h b/clang/lib/Headers/fmaintrin.h index c51009079f8d5..eba527f3604d0 100644 --- a/clang/lib/Headers/fmaintrin.h +++ b/clang/lib/Headers/fmaintrin.h @@ -95,10 +95,10 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]); + return __A; } /// Computes a scalar multiply-add of the double-precision values in the @@ -124,10 +124,10 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]); + return __A; } /// Computes a multiply-subtract of 128-bit vectors of [4 x float]. @@ -195,10 +195,10 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]); + return __A; } /// Computes a scalar multiply-subtract of the double-precision values in @@ -224,10 +224,10 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]); + return __A; } /// Computes a negated multiply-add of 128-bit vectors of [4 x float]. @@ -295,10 +295,10 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]); + return __A; } /// Computes a scalar negated multiply-add of the double-precision values @@ -324,10 +324,10 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]); + return __A; } /// Computes a negated multiply-subtract of 128-bit vectors of [4 x float]. @@ -395,10 +395,10 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) /// 32 bits. /// \returns A 128-bit vector of [4 x float] containing the result in the low /// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) -{ - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]); + return __A; } /// Computes a scalar negated multiply-subtract of the double-precision @@ -424,10 +424,10 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) /// 64 bits. /// \returns A 128-bit vector of [2 x double] containing the result in the low /// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) -{ - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { + __A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]); + return __A; } /// Computes a multiply with alternating add/subtract of 128-bit vectors of diff --git a/clang/test/CodeGen/X86/fma-builtins.c b/clang/test/CodeGen/X86/fma-builtins.c index 5445e50d4ecea..ea93bca2bad65 100644 --- a/clang/test/CodeGen/X86/fma-builtins.c +++ b/clang/test/CodeGen/X86/fma-builtins.c @@ -28,23 +28,25 @@ TEST_CONSTEXPR(match_m128d(_mm_fmadd_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0, __m128 test_mm_fmadd_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmadd_ss - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_fmadd_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_fmadd_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -7.0f, 1.0f, -2.0f, -0.0f)); __m128d test_mm_fmadd_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fmadd_sd - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}) - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_fmadd_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_fmadd_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -12.0, 1.0)); __m128 test_mm_fmsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmsub_ps @@ -64,25 +66,27 @@ TEST_CONSTEXPR(match_m128d(_mm_fmsub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0, __m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmsub_ss - // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg float %{{.+}} + // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[NEG]]) + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_fmsub_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_fmsub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -9.0f, 1.0f, -2.0f, -0.0f)); __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fmsub_sd - // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}) - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg double %{{.+}} + // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[NEG]]) + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_fmsub_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_fmsub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 4.0, 1.0)); __m128 test_mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmadd_ps @@ -102,25 +106,27 @@ TEST_CONSTEXPR(match_m128d(_mm_fnmadd_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0 __m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmadd_ss - // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg float %{{.+}} + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: call float @llvm.fma.f32(float %{{.*}}, float [[NEG]], float %{{.*}}) + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_fnmadd_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_fnmadd_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 9.0f, 1.0f, -2.0f, -0.0f)); __m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fnmadd_sd - // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}) - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg double %{{.+}} + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: call double @llvm.fma.f64(double %{{.*}}, double [[NEG]], double %{{.*}}) + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_fnmadd_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_fnmadd_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -4.0, 1.0)); __m128 test_mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmsub_ps @@ -142,27 +148,29 @@ TEST_CONSTEXPR(match_m128d(_mm_fnmsub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0 __m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmsub_ss - // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) - // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg float %{{.+}} + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG2:%.+]] = fneg float %{{.+}} + // CHECK: call float @llvm.fma.f32(float %{{.*}}, float [[NEG]], float [[NEG2]]) + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_fnmsub_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_fnmsub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 7.0f, 1.0f, -2.0f, -0.0f)); __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fnmsub_sd - // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}) - // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg double %{{.+}} + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG2:%.+]] = fneg double %{{.+}} + // CHECK: call double @llvm.fma.f64(double %{{.*}}, double [[NEG]], double [[NEG2]]) + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_fnmsub_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_fnmsub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 12.0, 1.0)); __m128 test_mm_fmaddsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmaddsub_ps diff --git a/clang/test/CodeGen/X86/fma4-builtins.c b/clang/test/CodeGen/X86/fma4-builtins.c index fb449d5da2591..949519864512b 100644 --- a/clang/test/CodeGen/X86/fma4-builtins.c +++ b/clang/test/CodeGen/X86/fma4-builtins.c @@ -28,23 +28,29 @@ TEST_CONSTEXPR(match_m128d(_mm_macc_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0, __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_macc_ss - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}) - // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 + // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3 return _mm_macc_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_macc_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -7.0f, 0.0f, 0.0f, 0.0f)); __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_macc_sd - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}) - // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 + // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1 return _mm_macc_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_macc_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -12.0, 0.0)); __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_msub_ps @@ -64,25 +70,31 @@ TEST_CONSTEXPR(match_m128d(_mm_msub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0, __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_msub_ss - // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]]) - // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg float %{{.+}} + // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[NEG]]) + // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3 return _mm_msub_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_msub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), -9.0f, 0.0f, 0.0f, 0.0f)); __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_msub_sd - // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]]) - // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg double %{{.+}} + // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[NEG]]) + // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1 return _mm_msub_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_msub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 4.0, 0.0)); __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmacc_ps @@ -102,25 +114,31 @@ TEST_CONSTEXPR(match_m128d(_mm_nmacc_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0, __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmacc_ss - // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}}) - // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg float %{{.+}} + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: call float @llvm.fma.f32(float [[NEG]], float %{{.*}}, float %{{.*}}) + // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3 return _mm_nmacc_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_nmacc_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 9.0f, 0.0f, 0.0f, 0.0f)); __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmacc_sd - // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}}) - // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg double %{{.+}} + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: call double @llvm.fma.f64(double [[NEG]], double %{{.*}}, double %{{.*}}) + // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1 return _mm_nmacc_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_nmacc_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), -4.0, 0.0)); __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmsub_ps @@ -142,27 +160,33 @@ TEST_CONSTEXPR(match_m128d(_mm_nmsub_pd((__m128d){ 0.0, -4.0 }, (__m128d){ -0.0, __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmsub_ss - // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}} - // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0 - // CHECK: extractelement <4 x float> %{{.*}}, i64 0 - // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0 - // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]]) - // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg float %{{.+}} + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: [[NEG2:%.+]] = fneg float %{{.+}} + // CHECK: call float @llvm.fma.f32(float [[NEG]], float %{{.*}}, float [[NEG2]]) + // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2 + // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3 return _mm_nmsub_ss(a, b, c); } +TEST_CONSTEXPR(match_m128(_mm_nmsub_ss((__m128){ -4.0f, 1.0f, -2.0f, -0.0f }, (__m128){ 2.0f, 4.0f, 2.0f, -0.0f }, (__m128){ 1.0f, -4.0f, 2.0f, 1.0f }), 7.0f, 0.0f, 0.0f, 0.0f)); __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmsub_sd - // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}} - // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0 - // CHECK: extractelement <2 x double> %{{.*}}, i64 0 - // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0 - // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]]) - // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG:%.+]] = fneg double %{{.+}} + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: [[NEG2:%.+]] = fneg double %{{.+}} + // CHECK: call double @llvm.fma.f64(double [[NEG]], double %{{.*}}, double [[NEG2]]) + // CHECK: insertelement <2 x double> poison, double %{{.*}}, i32 0 + // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1 return _mm_nmsub_sd(a, b, c); } +TEST_CONSTEXPR(match_m128d(_mm_nmsub_sd((__m128d){ -4.0, 1.0 }, (__m128d){ 1.0, 2.0 }, (__m128d){ -8.0, 3.0 }), 12.0, 0.0)); __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_maddsub_ps