Skip to content

Commit a10a8bb

Browse files
authored
[Headers][X86] Enable constexpr handling for MMX/SSE sitofp/uitofp helper cvt intrinsics (llvm#153017)
1 parent 585f27c commit a10a8bb

File tree

2 files changed

+33
-7
lines changed

2 files changed

+33
-7
lines changed

clang/lib/Headers/xmmintrin.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,7 +1689,7 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) {
16891689
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
16901690
/// converted value of the second operand. The upper 64 bits are copied from
16911691
/// the upper 64 bits of the first operand.
1692-
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
1692+
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
16931693
_mm_cvtpi32_ps(__m128 __a, __m64 __b)
16941694
{
16951695
return (__m128)__builtin_shufflevector(
@@ -1715,7 +1715,7 @@ _mm_cvtpi32_ps(__m128 __a, __m64 __b)
17151715
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
17161716
/// converted value from the second operand. The upper 64 bits are copied
17171717
/// from the upper 64 bits of the first operand.
1718-
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
1718+
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
17191719
_mm_cvt_pi2ps(__m128 __a, __m64 __b)
17201720
{
17211721
return _mm_cvtpi32_ps(__a, __b);
@@ -2874,7 +2874,7 @@ _mm_movelh_ps(__m128 __a, __m128 __b) {
28742874
/// from the corresponding elements in this operand.
28752875
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
28762876
/// values from the operand.
2877-
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
2877+
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
28782878
_mm_cvtpi16_ps(__m64 __a)
28792879
{
28802880
return __builtin_convertvector((__v4hi)__a, __v4sf);
@@ -2892,7 +2892,7 @@ _mm_cvtpi16_ps(__m64 __a)
28922892
/// destination are copied from the corresponding elements in this operand.
28932893
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
28942894
/// values from the operand.
2895-
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
2895+
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
28962896
_mm_cvtpu16_ps(__m64 __a)
28972897
{
28982898
return __builtin_convertvector((__v4hu)__a, __v4sf);
@@ -2910,7 +2910,7 @@ _mm_cvtpu16_ps(__m64 __a)
29102910
/// from the corresponding lower 4 elements in this operand.
29112911
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
29122912
/// values from the operand.
2913-
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
2913+
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
29142914
_mm_cvtpi8_ps(__m64 __a)
29152915
{
29162916
return __builtin_convertvector(
@@ -2931,7 +2931,7 @@ _mm_cvtpi8_ps(__m64 __a)
29312931
/// operand.
29322932
/// \returns A 128-bit vector of [4 x float] containing the copied and converted
29332933
/// values from the source operand.
2934-
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
2934+
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
29352935
_mm_cvtpu8_ps(__m64 __a)
29362936
{
29372937
return __builtin_convertvector(
@@ -2955,7 +2955,7 @@ _mm_cvtpu8_ps(__m64 __a)
29552955
/// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the
29562956
/// copied and converted values from the first operand. The upper 64 bits
29572957
/// contain the copied and converted values from the second operand.
2958-
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2
2958+
static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
29592959
_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
29602960
{
29612961
return __builtin_convertvector(

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ __m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) {
167167
// CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
168168
return _mm_cvt_pi2ps(a, b);
169169
}
170+
TEST_CONSTEXPR(match_m128(_mm_cvt_pi2ps((__m128){-5.0f, +7.0f, -9.0f, +11.0f}, (__m64)(__v2si){-2,+4}), -2.0f, +4.0f, -9.0f, +11.0f));
170171

171172
__m64 test_mm_cvt_ps2pi(__m128 a) {
172173
// CHECK-LABEL: test_mm_cvt_ps2pi
@@ -180,29 +181,40 @@ __m64 test_mm_cvtpd_pi32(__m128d a) {
180181
return _mm_cvtpd_pi32(a);
181182
}
182183

184+
__m128 test_mm_cvtpi8_ps(__m64 a) {
185+
// CHECK-LABEL: test_mm_cvtpi8_ps
186+
// CHECK: sitofp <4 x i8> {{%.*}} to <4 x float>
187+
return _mm_cvtpi8_ps(a);
188+
}
189+
TEST_CONSTEXPR(match_m128(_mm_cvtpi8_ps((__m64)(__v8qi){1, 2, 3, 4, 5, 6, 7, 8}), +1.0f, +2.0f, +3.0f, +4.0f));
190+
183191
__m128 test_mm_cvtpi16_ps(__m64 a) {
184192
// CHECK-LABEL: test_mm_cvtpi16_ps
185193
// CHECK: sitofp <4 x i16> {{%.*}} to <4 x float>
186194
return _mm_cvtpi16_ps(a);
187195
}
196+
TEST_CONSTEXPR(match_m128(_mm_cvtpi16_ps((__m64)(__v4hi){-3, +9, -8, +256}), -3.0f, +9.0f, -8.0f, +256.0f));
188197

189198
__m128d test_mm_cvtpi32_pd(__m64 a) {
190199
// CHECK-LABEL: test_mm_cvtpi32_pd
191200
// CHECK: sitofp <2 x i32> {{%.*}} to <2 x double>
192201
return _mm_cvtpi32_pd(a);
193202
}
203+
TEST_CONSTEXPR(match_m128d(_mm_cvtpi32_pd((__m64)(__v2si){-10,+17}), -10.0, +17.0));
194204

195205
__m128 test_mm_cvtpi32_ps(__m128 a, __m64 b) {
196206
// CHECK-LABEL: test_mm_cvtpi32_ps
197207
// CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
198208
return _mm_cvtpi32_ps(a, b);
199209
}
210+
TEST_CONSTEXPR(match_m128(_mm_cvtpi32_ps((__m128){+1.0f, -2.0f, +3.0f, +5.0f}, (__m64)(__v2si){+100,-200}), +100.0f, -200.0f, +3.0f, +5.0f));
200211

201212
__m128 test_mm_cvtpi32x2_ps(__m64 a, __m64 b) {
202213
// CHECK-LABEL: test_mm_cvtpi32x2_ps
203214
// CHECK: sitofp <4 x i32> {{%.*}} to <4 x float>
204215
return _mm_cvtpi32x2_ps(a, b);
205216
}
217+
TEST_CONSTEXPR(match_m128(_mm_cvtpi32x2_ps((__m64)(__v2si){-8,+7}, (__m64)(__v2si){+100,-200}), -8.0f, +7.0f, +100.0f, -200.0f));
206218

207219
__m64 test_mm_cvtps_pi16(__m128 a) {
208220
// CHECK-LABEL: test_mm_cvtps_pi16
@@ -217,6 +229,20 @@ __m64 test_mm_cvtps_pi32(__m128 a) {
217229
return _mm_cvtps_pi32(a);
218230
}
219231

232+
__m128 test_mm_cvtpu8_ps(__m64 a) {
233+
// CHECK-LABEL: test_mm_cvtpu8_ps
234+
// CHECK: uitofp <4 x i8> {{%.*}} to <4 x float>
235+
return _mm_cvtpu8_ps(a);
236+
}
237+
TEST_CONSTEXPR(match_m128(_mm_cvtpu8_ps((__m64)(__v8qi){8, 7, 6, 5, 4, 3, 2, 1}), 8.0f, 7.0f, 6.0f, 5.0f));
238+
239+
__m128 test_mm_cvtpu16_ps(__m64 a) {
240+
// CHECK-LABEL: test_mm_cvtpu16_ps
241+
// CHECK: uitofp <4 x i16> {{%.*}} to <4 x float>
242+
return _mm_cvtpu16_ps(a);
243+
}
244+
TEST_CONSTEXPR(match_m128(_mm_cvtpu16_ps((__m64)(__v4hi){-3, +9, -8, +256}), 65533.0f, 9.0f, 65528.0f, 256.0f));
245+
220246
__m64 test_mm_cvtsi32_si64(int a) {
221247
// CHECK-LABEL: test_mm_cvtsi32_si64
222248
// CHECK: insertelement <2 x i32>

0 commit comments

Comments
 (0)