Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -757,14 +757,6 @@ let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
}

let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vcvtph2ps : X86Builtin<"_Vector<4, float>(_Vector<8, short>)">;
}

let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vcvtph2ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, short>)">;
}

let Features = "rdrnd", Attributes = [NoThrow] in {
def rdrand16_step : X86Builtin<"unsigned int(unsigned short *)">;
def rdrand32_step : X86Builtin<"unsigned int(unsigned int *)">;
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/CodeGen/TargetBuiltins/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2841,8 +2841,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);

// f16c half2float intrinsics
case X86::BI__builtin_ia32_vcvtph2ps:
case X86::BI__builtin_ia32_vcvtph2ps256:
case X86::BI__builtin_ia32_vcvtph2ps_mask:
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
Expand Down
13 changes: 8 additions & 5 deletions clang/lib/Headers/f16cintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@
static __inline float __DEFAULT_FN_ATTRS128
_cvtsh_ss(unsigned short __a)
{
__v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
__v4sf __r = __builtin_ia32_vcvtph2ps(__v);
return __r[0];
return (float)__builtin_bit_cast(__fp16, __a);
}

/// Converts a 32-bit single-precision float value to a 16-bit
Expand Down Expand Up @@ -109,7 +107,10 @@ _cvtsh_ss(unsigned short __a)
static __inline __m128 __DEFAULT_FN_ATTRS128
_mm_cvtph_ps(__m128i __a)
{
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));

__v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
return __builtin_convertvector((__v4fp16)__v, __v4sf);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return (__m128)__builtin_convertvector((__v4fp16)__v, __v4sf);

}

/// Converts a 256-bit vector of [8 x float] into a 128-bit vector
Expand Down Expand Up @@ -153,7 +154,9 @@ _mm_cvtph_ps(__m128i __a)
static __inline __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtph_ps(__m128i __a)
{
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));

return __builtin_convertvector((__v8fp16)__a, __v8sf);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return (__m256)__builtin_convertvector((__v8fp16)__a, __v8sf);

}

#undef __DEFAULT_FN_ATTRS128
Expand Down
15 changes: 3 additions & 12 deletions clang/test/CodeGen/X86/f16c-builtins-constrained.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,8 @@

float test_cvtsh_ss(unsigned short a) {
// CHECK-LABEL: test_cvtsh_ss
// CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
// CHECK: [[CONV:%.*]] = call {{.*}}float @llvm.experimental.constrained.fpext.f32.f16(half %{{.*}}, metadata !"fpexcept.strict")
// CHECK: ret float [[CONV]]
return _cvtsh_ss(a);
}

Expand All @@ -38,7 +29,7 @@ unsigned short test_cvtss_sh(float a) {

__m128 test_mm_cvtph_ps(__m128i a) {
// CHECK-LABEL: test_mm_cvtph_ps
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: call {{.*}}<4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %{{.*}}, metadata !"fpexcept.strict")
return _mm_cvtph_ps(a);
}
Expand Down
15 changes: 3 additions & 12 deletions clang/test/CodeGen/X86/f16c-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,8 @@

float test_cvtsh_ss(unsigned short a) {
// CHECK-LABEL: test_cvtsh_ss
// CHECK: insertelement <8 x i16> poison, i16 %{{.*}}, i32 0
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
// CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
// CHECK: [[CONV:%.*]] = fpext half %{{.*}} to float
// CHECK: ret float [[CONV]]
return _cvtsh_ss(a);
}

Expand All @@ -35,7 +26,7 @@ unsigned short test_cvtss_sh(float a) {

__m128 test_mm_cvtph_ps(__m128i a) {
// CHECK-LABEL: test_mm_cvtph_ps
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// CHECK: fpext <4 x half> %{{.*}} to <4 x float>
return _mm_cvtph_ps(a);
}
Expand Down