Skip to content

Commit c9320bc

Browse files
committed
[X86] Use correctly sized floating point literals in *zero_ps/pd.
This avoids depending on int->float or double->float conversion. Improving codegen with #pragma STDC FENV_ACCESS ON. Really we should improve constant folding somewhere, but this was a cheap and easy improvement. Fixes PR59052.
1 parent dddf6ab commit c9320bc

File tree

5 files changed

+6
-62
lines changed

5 files changed

+6
-62
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,8 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
256256
static __inline __m512 __DEFAULT_FN_ATTRS512
257257
_mm512_setzero_ps(void)
258258
{
259-
return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
260-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
259+
return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
260+
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
261261
}
262262

263263
#define _mm512_setzero _mm512_setzero_ps

clang/lib/Headers/avxintrin.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4298,7 +4298,7 @@ _mm256_set1_epi64x(long long __q)
42984298
static __inline __m256d __DEFAULT_FN_ATTRS
42994299
_mm256_setzero_pd(void)
43004300
{
4301-
return __extension__ (__m256d){ 0, 0, 0, 0 };
4301+
return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
43024302
}
43034303

43044304
/// Constructs a 256-bit floating-point vector of [8 x float] with all
@@ -4312,7 +4312,7 @@ _mm256_setzero_pd(void)
43124312
static __inline __m256 __DEFAULT_FN_ATTRS
43134313
_mm256_setzero_ps(void)
43144314
{
4315-
return __extension__ (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
4315+
return __extension__ (__m256){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
43164316
}
43174317

43184318
/// Constructs a 256-bit integer vector initialized to zero.

clang/lib/Headers/emmintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1819,7 +1819,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w,
18191819
/// \returns An initialized 128-bit floating-point vector of [2 x double] with
18201820
/// all elements set to zero.
18211821
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) {
1822-
return __extension__(__m128d){0, 0};
1822+
return __extension__(__m128d){0.0, 0.0};
18231823
}
18241824

18251825
/// Constructs a 128-bit floating-point vector of [2 x double]. The lower

clang/lib/Headers/xmmintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1906,7 +1906,7 @@ _mm_setr_ps(float __z, float __y, float __x, float __w)
19061906
static __inline__ __m128 __DEFAULT_FN_ATTRS
19071907
_mm_setzero_ps(void)
19081908
{
1909-
return __extension__ (__m128){ 0, 0, 0, 0 };
1909+
return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
19101910
}
19111911

19121912
/// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a

clang/test/CodeGen/X86/avx512dq-builtins-constrained.c

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,6 @@ __m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) {
6767
__m256 test_mm512_cvtepi64_ps(__m512i __A) {
6868
// COMMON-LABEL: test_mm512_cvtepi64_ps
6969
// UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float>
70-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
71-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
72-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
73-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
74-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
75-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
76-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
77-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
7870
// CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
7971
// CHECK-ASM: vcvtqq2ps
8072
return _mm512_cvtepi64_ps(__A);
@@ -92,14 +84,6 @@ __m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
9284
__m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
9385
// COMMON-LABEL: test_mm512_maskz_cvtepi64_ps
9486
// UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x float>
95-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
96-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
97-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
98-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
99-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
100-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
101-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
102-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
10387
// CONSTRAINED: call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
10488
// COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
10589
// CHECK-ASM: vcvtqq2ps
@@ -108,14 +92,6 @@ __m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
10892

10993
__m256 test_mm512_cvt_roundepi64_ps(__m512i __A) {
11094
// COMMON-LABEL: test_mm512_cvt_roundepi64_ps
111-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
112-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
113-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
114-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
115-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
116-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
117-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
118-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
11995
// COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
12096
// CHECK-ASM: vcvtqq2ps
12197
return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
@@ -131,14 +107,6 @@ __m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A)
131107

132108
__m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) {
133109
// COMMON-LABEL: test_mm512_maskz_cvt_roundepi64_ps
134-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
135-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
136-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
137-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
138-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
139-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
140-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
141-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
142110
// COMMONIR: @llvm.x86.avx512.sitofp.round.v8f32.v8i64
143111
// COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
144112
// CHECK-ASM: vcvtqq2ps
@@ -197,14 +165,6 @@ __m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) {
197165
__m256 test_mm512_cvtepu64_ps(__m512i __A) {
198166
// COMMON-LABEL: test_mm512_cvtepu64_ps
199167
// UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float>
200-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
201-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
202-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
203-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
204-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
205-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
206-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
207-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
208168
// CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
209169
// CHECK-ASM: vcvtuqq2ps
210170
return _mm512_cvtepu64_ps(__A);
@@ -222,14 +182,6 @@ __m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
222182
__m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) {
223183
// COMMON-LABEL: test_mm512_maskz_cvtepu64_ps
224184
// UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x float>
225-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
226-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
227-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
228-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
229-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
230-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
231-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
232-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
233185
// CONSTRAINED: call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
234186
// COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
235187
// CHECK-ASM: vcvtuqq2ps
@@ -253,14 +205,6 @@ __m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A)
253205

254206
__m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) {
255207
// COMMON-LABEL: test_mm512_maskz_cvt_roundepu64_ps
256-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
257-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
258-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
259-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
260-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
261-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
262-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
263-
// CONSTRAINED: call float @llvm.experimental.constrained.sitofp.f32.i32(i32 0, metadata !"round.tonearest", metadata !"fpexcept.strict")
264208
// COMMONIR: @llvm.x86.avx512.uitofp.round.v8f32.v8i64
265209
// COMMONIR: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
266210
// CHECK-ASM: vcvtuqq2ps

0 commit comments

Comments
 (0)