Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -866,16 +866,6 @@ let Features = "sha", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def sha256msg2 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
}

let Features = "fma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vfmaddss3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def vfmaddsd3 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
}

let Features = "fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vfmaddss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def vfmaddsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
}

let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vfmaddsubps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
def vfmaddsubpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
Expand Down
6 changes: 0 additions & 6 deletions clang/lib/CodeGen/TargetBuiltins/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1028,16 +1028,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);

case X86::BI__builtin_ia32_vfmaddss3:
case X86::BI__builtin_ia32_vfmaddsd3:
case X86::BI__builtin_ia32_vfmaddsh3_mask:
case X86::BI__builtin_ia32_vfmaddss3_mask:
case X86::BI__builtin_ia32_vfmaddsd3_mask:
return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
case X86::BI__builtin_ia32_vfmaddss:
case X86::BI__builtin_ia32_vfmaddsd:
return EmitScalarFMAExpr(*this, E, Ops,
Constant::getNullValue(Ops[0]->getType()));
case X86::BI__builtin_ia32_vfmaddsh3_maskz:
case X86::BI__builtin_ia32_vfmaddss3_maskz:
case X86::BI__builtin_ia32_vfmaddsd3_maskz:
Expand Down
56 changes: 24 additions & 32 deletions clang/lib/Headers/fma4intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,14 @@ _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
(__v2df)__C);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], __C[0]));
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], __C[0]));
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
Expand All @@ -64,16 +62,14 @@ _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
-(__v2df)__C);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
return _mm_set_ss(__builtin_elementwise_fma(__A[0], __B[0], -__C[0]));
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
return _mm_set_sd(__builtin_elementwise_fma(__A[0], __B[0], -__C[0]));
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
Expand All @@ -88,16 +84,14 @@ _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
(__v2df)__C);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], __C[0]));
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], __C[0]));
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
Expand All @@ -112,16 +106,14 @@ _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
-(__v2df)__C);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
return _mm_set_ss(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0]));
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
return _mm_set_sd(__builtin_elementwise_fma(-__A[0], __B[0], -__C[0]));
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128
Expand Down
64 changes: 32 additions & 32 deletions clang/lib/Headers/fmaintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
/// 32 bits.
/// \returns A 128-bit vector of [4 x float] containing the result in the low
/// 32 bits and a copy of \a __A[127:32] in the upper 96 bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
__A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]);
return __A;
}

/// Computes a scalar multiply-add of the double-precision values in the
Expand All @@ -124,10 +124,10 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
/// 64 bits.
/// \returns A 128-bit vector of [2 x double] containing the result in the low
/// 64 bits and a copy of \a __A[127:64] in the upper 64 bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
__A[0] = __builtin_elementwise_fma(__A[0], __B[0], __C[0]);
return __A;
}

/// Computes a multiply-subtract of 128-bit vectors of [4 x float].
Expand Down Expand Up @@ -195,10 +195,10 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
/// 32 bits.
/// \returns A 128-bit vector of [4 x float] containing the result in the low
/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
__A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]);
return __A;
}

/// Computes a scalar multiply-subtract of the double-precision values in
Expand All @@ -224,10 +224,10 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
/// 64 bits.
/// \returns A 128-bit vector of [2 x double] containing the result in the low
/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
__A[0] = __builtin_elementwise_fma(__A[0], __B[0], -__C[0]);
return __A;
}

/// Computes a negated multiply-add of 128-bit vectors of [4 x float].
Expand Down Expand Up @@ -295,10 +295,10 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
/// 32 bits.
/// \returns A 128-bit vector of [4 x float] containing the result in the low
/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
__A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]);
return __A;
}

/// Computes a scalar negated multiply-add of the double-precision values
Expand All @@ -324,10 +324,10 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
/// 64 bits.
/// \returns A 128-bit vector of [2 x double] containing the result in the low
/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
__A[0] = __builtin_elementwise_fma(__A[0], -__B[0], __C[0]);
return __A;
}

/// Computes a negated multiply-subtract of 128-bit vectors of [4 x float].
Expand Down Expand Up @@ -395,10 +395,10 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
/// 32 bits.
/// \returns A 128-bit vector of [4 x float] containing the result in the low
/// 32 bits, and a copy of \a __A[127:32] in the upper 96 bits.
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
__A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]);
return __A;
}

/// Computes a scalar negated multiply-subtract of the double-precision
Expand All @@ -424,10 +424,10 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
/// 64 bits.
/// \returns A 128-bit vector of [2 x double] containing the result in the low
/// 64 bits, and a copy of \a __A[127:64] in the upper 64 bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
__A[0] = __builtin_elementwise_fma(__A[0], -__B[0], -__C[0]);
return __A;
}

/// Computes a multiply with alternating add/subtract of 128-bit vectors of
Expand Down
Loading
Loading