Skip to content

Commit f6ac261

Browse files
committed
[Clang][X86] Replace some __builtin_ia32_vfmadd* with __builtin_elementwise_fma
The following intrinsics were replaced by `__builtin_elementwise_fma`: - `__builtin_ia32_vfmaddps(256)` - `__builtin_ia32_vfmaddpd(256)` - `__builtin_ia32_vfmaddph(256)` - `__builtin_ia32_vfmaddbf16(128 | 256 | 512)` All `__builtin_ia32_vfmadd*` intrinsics are lowered to `__builtin_elementwise_fma`, so keeping them is an unnecessary indirection.
1 parent 44aedac commit f6ac261

File tree

9 files changed

+125
-158
lines changed

9 files changed

+125
-158
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -878,11 +878,6 @@ let Features = "sha", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
878878
def sha256msg2 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
879879
}
880880

881-
let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
882-
def vfmaddps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
883-
def vfmaddpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
884-
}
885-
886881
let Features = "fma", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
887882
def vfmaddss3 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
888883
def vfmaddsd3 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
@@ -899,8 +894,6 @@ let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<128
899894
}
900895

901896
let Features = "fma|fma4", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
902-
def vfmaddps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
903-
def vfmaddpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
904897
def vfmaddsubps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
905898
def vfmaddsubpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
906899
}
@@ -4140,14 +4133,6 @@ let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVecto
41404133
def vcvtps2phx512_mask : X86Builtin<"_Vector<16, _Float16>(_Vector<16, float>, _Vector<16, _Float16>, unsigned short, _Constant int)">;
41414134
}
41424135

4143-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
4144-
def vfmaddph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>)">;
4145-
}
4146-
4147-
let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
4148-
def vfmaddph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>, _Vector<16, _Float16>, _Vector<16, _Float16>)">;
4149-
}
4150-
41514136
let Features = "avx512fp16,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
41524137
def vfmaddph512_mask : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
41534138
def vfmaddph512_mask3 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Vector<32, _Float16>, _Vector<32, _Float16>, unsigned int, _Constant int)">;
@@ -5373,13 +5358,4 @@ let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<
53735358

53745359
let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
53755360
def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
5376-
def vfmaddbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
5377-
}
5378-
5379-
let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
5380-
def vfmaddbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
5381-
}
5382-
5383-
let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
5384-
def vfmaddbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
53855361
}

clang/lib/CodeGen/TargetBuiltins/X86.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,18 +1051,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
10511051
case X86::BI__builtin_ia32_vfmsubsd3_mask3:
10521052
return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
10531053
/*NegAcc*/ true);
1054-
case X86::BI__builtin_ia32_vfmaddph:
1055-
case X86::BI__builtin_ia32_vfmaddps:
1056-
case X86::BI__builtin_ia32_vfmaddpd:
1057-
case X86::BI__builtin_ia32_vfmaddph256:
1058-
case X86::BI__builtin_ia32_vfmaddps256:
1059-
case X86::BI__builtin_ia32_vfmaddpd256:
10601054
case X86::BI__builtin_ia32_vfmaddph512_mask:
10611055
case X86::BI__builtin_ia32_vfmaddph512_maskz:
10621056
case X86::BI__builtin_ia32_vfmaddph512_mask3:
1063-
case X86::BI__builtin_ia32_vfmaddbf16128:
1064-
case X86::BI__builtin_ia32_vfmaddbf16256:
1065-
case X86::BI__builtin_ia32_vfmaddbf16512:
10661057
case X86::BI__builtin_ia32_vfmaddps512_mask:
10671058
case X86::BI__builtin_ia32_vfmaddps512_maskz:
10681059
case X86::BI__builtin_ia32_vfmaddps512_mask3:

clang/lib/Headers/avx10_2_512bf16intrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
441441

442442
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
443443
_mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
444-
return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
444+
return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B,
445445
(__v32bf)__C);
446446
}
447447

@@ -469,7 +469,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh(
469469

470470
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
471471
_mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
472-
return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
472+
return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, (__v32bf)__B,
473473
-(__v32bf)__C);
474474
}
475475

@@ -497,7 +497,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh(
497497

498498
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
499499
_mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
500-
return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
500+
return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B,
501501
(__v32bf)__C);
502502
}
503503

@@ -527,7 +527,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh(
527527

528528
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
529529
_mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
530-
return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
530+
return (__m512bh)__builtin_elementwise_fma((__v32bf)__A, -(__v32bf)__B,
531531
-(__v32bf)__C);
532532
}
533533

clang/lib/Headers/avx10_2bf16intrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,7 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
852852

853853
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
854854
_mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
855-
return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
855+
return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B,
856856
(__v16bf)__C);
857857
}
858858

@@ -880,7 +880,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh(
880880

881881
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
882882
_mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
883-
return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
883+
return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, (__v16bf)__B,
884884
-(__v16bf)__C);
885885
}
886886

@@ -908,7 +908,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh(
908908

909909
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
910910
_mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
911-
return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
911+
return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B,
912912
(__v16bf)__C);
913913
}
914914

@@ -938,7 +938,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh(
938938

939939
static __inline__ __m256bh __DEFAULT_FN_ATTRS256
940940
_mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
941-
return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
941+
return (__m256bh)__builtin_elementwise_fma((__v16bf)__A, -(__v16bf)__B,
942942
-(__v16bf)__C);
943943
}
944944

@@ -969,7 +969,7 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh(
969969
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A,
970970
__m128bh __B,
971971
__m128bh __C) {
972-
return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
972+
return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B,
973973
(__v8bf)__C);
974974
}
975975

@@ -997,7 +997,7 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
997997
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A,
998998
__m128bh __B,
999999
__m128bh __C) {
1000-
return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
1000+
return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, (__v8bf)__B,
10011001
-(__v8bf)__C);
10021002
}
10031003

@@ -1025,7 +1025,7 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
10251025
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A,
10261026
__m128bh __B,
10271027
__m128bh __C) {
1028-
return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
1028+
return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B,
10291029
(__v8bf)__C);
10301030
}
10311031

@@ -1053,7 +1053,7 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
10531053
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A,
10541054
__m128bh __B,
10551055
__m128bh __C) {
1056-
return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
1056+
return (__m128bh)__builtin_elementwise_fma((__v8bf)__A, -(__v8bf)__B,
10571057
-(__v8bf)__C);
10581058
}
10591059

0 commit comments

Comments
 (0)