Skip to content

Commit ea2135f

Browse files
committed
[Clang][X86] Replace F16C vcvtph2ps/256 intrinsics with __builtin_convertvector
The following intrinsics were replaced by a combination of `__builtin_shufflevector` and `__builtin_convertvector`: - `__builtin_ia32_vcvtph2ps` - `__builtin_ia32_vcvtph2ps256` Fixes #152749
1 parent 10e146a commit ea2135f

File tree

6 files changed

+14
-17
lines changed

6 files changed

+14
-17
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -757,14 +757,6 @@ let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
757757
def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
758758
}
759759

760-
let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
761-
def vcvtph2ps : X86Builtin<"_Vector<4, float>(_Vector<8, short>)">;
762-
}
763-
764-
let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
765-
def vcvtph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, short>)">;
766-
}
767-
768760
let Features = "rdrnd", Attributes = [NoThrow] in {
769761
def rdrand16_step : X86Builtin<"unsigned int(unsigned short *)">;
770762
def rdrand32_step : X86Builtin<"unsigned int(unsigned int *)">;

clang/lib/CodeGen/TargetBuiltins/X86.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,8 +2841,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
28412841
return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
28422842

28432843
// f16c half2float intrinsics
2844-
case X86::BI__builtin_ia32_vcvtph2ps:
2845-
case X86::BI__builtin_ia32_vcvtph2ps256:
28462844
case X86::BI__builtin_ia32_vcvtph2ps_mask:
28472845
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
28482846
case X86::BI__builtin_ia32_vcvtph2ps512_mask: {

clang/lib/Headers/emmintrin.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,17 @@ typedef signed char __v16qs __attribute__((__vector_size__(16)));
4040

4141
#ifdef __SSE2__
4242
/* Both _Float16 and __bf16 require SSE2 being enabled. */
43+
typedef _Float16 __v4hf __attribute__((__vector_size__(8)));
4344
typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
4445
typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
4546
typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
4647

4748
typedef __bf16 __v8bf __attribute__((__vector_size__(16), __aligned__(16)));
4849
typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
50+
#else
51+
/* Use __fp16 when _Float16 is not supported. */
52+
typedef __fp16 __v4hf __attribute__((__vector_size__(8)));
53+
typedef __fp16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
4954
#endif
5055

5156
/* Define the default attributes for the functions in this file. */

clang/lib/Headers/f16cintrin.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ static __inline float __DEFAULT_FN_ATTRS128
3939
_cvtsh_ss(unsigned short __a)
4040
{
4141
__v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
42-
__v4sf __r = __builtin_ia32_vcvtph2ps(__v);
42+
__v4hi __w = __builtin_shufflevector(__v, __v, 0, 1, 2, 3);
43+
__v4sf __r = __builtin_convertvector((__v4hf)__w, __v4sf);
4344
return __r[0];
4445
}
4546

@@ -109,7 +110,8 @@ _cvtsh_ss(unsigned short __a)
109110
static __inline __m128 __DEFAULT_FN_ATTRS128
110111
_mm_cvtph_ps(__m128i __a)
111112
{
112-
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
113+
__v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
114+
return __builtin_convertvector((__v4hf)__v, __v4sf);
113115
}
114116

115117
/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
@@ -153,7 +155,7 @@ _mm_cvtph_ps(__m128i __a)
153155
static __inline __m256 __DEFAULT_FN_ATTRS256
154156
_mm256_cvtph_ps(__m128i __a)
155157
{
156-
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
158+
return __builtin_convertvector((__v8hf)__a, __v8sf);
157159
}
158160

159161
#undef __DEFAULT_FN_ATTRS128

clang/test/CodeGen/X86/f16c-builtins-constrained.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) {
1616
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
1717
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
1818
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
19-
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2020
// CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
2121
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
2222
return _cvtsh_ss(a);
@@ -38,7 +38,7 @@ unsigned short test_cvtss_sh(float a) {
3838

3939
__m128 test_mm_cvtph_ps(__m128i a) {
4040
// CHECK-LABEL: test_mm_cvtph_ps
41-
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
41+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4242
// CHECK: call {{.*}}<4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
4343
return _mm_cvtph_ps(a);
4444
}

clang/test/CodeGen/X86/f16c-builtins.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ float test_cvtsh_ss(unsigned short a) {
1616
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
1717
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
1818
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
19-
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2020
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
2121
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
2222
return _cvtsh_ss(a);
@@ -35,7 +35,7 @@ unsigned short test_cvtss_sh(float a) {
3535

3636
__m128 test_mm_cvtph_ps(__m128i a) {
3737
// CHECK-LABEL: test_mm_cvtph_ps
38-
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
38+
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3939
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
4040
return _mm_cvtph_ps(a);
4141
}

0 commit comments

Comments
 (0)