Skip to content

Commit b176ba7

Browse files
authored
[Headers][X86] Allow MMX/SSE integer min/max intrinsics to be used in constexpr (#156678)
Update the MMX/SSE integer min/max intrinsics to be constexpr compatible. This is a part of #153153.
1 parent c93e2de commit b176ba7

File tree

7 files changed

+61
-30
lines changed

7 files changed

+61
-30
lines changed

clang/lib/Headers/emmintrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2316,8 +2316,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a,
23162316
/// A 128-bit signed [8 x i16] vector.
23172317
/// \returns A 128-bit signed [8 x i16] vector containing the greater value of
23182318
/// each comparison.
2319-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
2320-
__m128i __b) {
2319+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2320+
_mm_max_epi16(__m128i __a, __m128i __b) {
23212321
return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
23222322
}
23232323

@@ -2335,8 +2335,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a,
23352335
/// A 128-bit unsigned [16 x i8] vector.
23362336
/// \returns A 128-bit unsigned [16 x i8] vector containing the greater value of
23372337
/// each comparison.
2338-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
2339-
__m128i __b) {
2338+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2339+
_mm_max_epu8(__m128i __a, __m128i __b) {
23402340
return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
23412341
}
23422342

@@ -2354,8 +2354,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a,
23542354
/// A 128-bit signed [8 x i16] vector.
23552355
/// \returns A 128-bit signed [8 x i16] vector containing the smaller value of
23562356
/// each comparison.
2357-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
2358-
__m128i __b) {
2357+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2358+
_mm_min_epi16(__m128i __a, __m128i __b) {
23592359
return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
23602360
}
23612361

@@ -2373,8 +2373,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a,
23732373
/// A 128-bit unsigned [16 x i8] vector.
23742374
/// \returns A 128-bit unsigned [16 x i8] vector containing the smaller value of
23752375
/// each comparison.
2376-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a,
2377-
__m128i __b) {
2376+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2377+
_mm_min_epu8(__m128i __a, __m128i __b) {
23782378
return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
23792379
}
23802380

clang/lib/Headers/smmintrin.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -675,8 +675,8 @@ _mm_stream_load_si128(const void *__V) {
675675
/// \param __V2
676676
/// A 128-bit vector of [16 x i8]
677677
/// \returns A 128-bit vector of [16 x i8] containing the lesser values.
678-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1,
679-
__m128i __V2) {
678+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
679+
_mm_min_epi8(__m128i __V1, __m128i __V2) {
680680
return (__m128i)__builtin_elementwise_min((__v16qs)__V1, (__v16qs)__V2);
681681
}
682682

@@ -693,8 +693,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1,
693693
/// \param __V2
694694
/// A 128-bit vector of [16 x i8].
695695
/// \returns A 128-bit vector of [16 x i8] containing the greater values.
696-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1,
697-
__m128i __V2) {
696+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
697+
_mm_max_epi8(__m128i __V1, __m128i __V2) {
698698
return (__m128i)__builtin_elementwise_max((__v16qs)__V1, (__v16qs)__V2);
699699
}
700700

@@ -711,8 +711,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1,
711711
/// \param __V2
712712
/// A 128-bit vector of [8 x u16].
713713
/// \returns A 128-bit vector of [8 x u16] containing the lesser values.
714-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1,
715-
__m128i __V2) {
714+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
715+
_mm_min_epu16(__m128i __V1, __m128i __V2) {
716716
return (__m128i)__builtin_elementwise_min((__v8hu)__V1, (__v8hu)__V2);
717717
}
718718

@@ -729,8 +729,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1,
729729
/// \param __V2
730730
/// A 128-bit vector of [8 x u16].
731731
/// \returns A 128-bit vector of [8 x u16] containing the greater values.
732-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1,
733-
__m128i __V2) {
732+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
733+
_mm_max_epu16(__m128i __V1, __m128i __V2) {
734734
return (__m128i)__builtin_elementwise_max((__v8hu)__V1, (__v8hu)__V2);
735735
}
736736

@@ -747,8 +747,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1,
747747
/// \param __V2
748748
/// A 128-bit vector of [4 x i32].
749749
/// \returns A 128-bit vector of [4 x i32] containing the lesser values.
750-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1,
751-
__m128i __V2) {
750+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
751+
_mm_min_epi32(__m128i __V1, __m128i __V2) {
752752
return (__m128i)__builtin_elementwise_min((__v4si)__V1, (__v4si)__V2);
753753
}
754754

@@ -765,8 +765,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1,
765765
/// \param __V2
766766
/// A 128-bit vector of [4 x i32].
767767
/// \returns A 128-bit vector of [4 x i32] containing the greater values.
768-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1,
769-
__m128i __V2) {
768+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
769+
_mm_max_epi32(__m128i __V1, __m128i __V2) {
770770
return (__m128i)__builtin_elementwise_max((__v4si)__V1, (__v4si)__V2);
771771
}
772772

@@ -783,8 +783,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1,
783783
/// \param __V2
784784
/// A 128-bit vector of [4 x u32].
785785
/// \returns A 128-bit vector of [4 x u32] containing the lesser values.
786-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1,
787-
__m128i __V2) {
786+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
787+
_mm_min_epu32(__m128i __V1, __m128i __V2) {
788788
return (__m128i)__builtin_elementwise_min((__v4su)__V1, (__v4su)__V2);
789789
}
790790

@@ -801,8 +801,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1,
801801
/// \param __V2
802802
/// A 128-bit vector of [4 x u32].
803803
/// \returns A 128-bit vector of [4 x u32] containing the greater values.
804-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1,
805-
__m128i __V2) {
804+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
805+
_mm_max_epu32(__m128i __V1, __m128i __V2) {
806806
return (__m128i)__builtin_elementwise_max((__v4su)__V1, (__v4su)__V2);
807807
}
808808

clang/lib/Headers/xmmintrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2353,9 +2353,8 @@ void _mm_sfence(void);
23532353
/// \param __b
23542354
/// A 64-bit integer vector containing one of the source operands.
23552355
/// \returns A 64-bit integer vector containing the comparison results.
2356-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
2357-
_mm_max_pi16(__m64 __a, __m64 __b)
2358-
{
2356+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
2357+
_mm_max_pi16(__m64 __a, __m64 __b) {
23592358
return (__m64)__builtin_elementwise_max((__v4hi)__a, (__v4hi)__b);
23602359
}
23612360

@@ -2391,9 +2390,8 @@ _mm_max_pu8(__m64 __a, __m64 __b)
23912390
/// \param __b
23922391
/// A 64-bit integer vector containing one of the source operands.
23932392
/// \returns A 64-bit integer vector containing the comparison results.
2394-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
2395-
_mm_min_pi16(__m64 __a, __m64 __b)
2396-
{
2393+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
2394+
_mm_min_pi16(__m64 __a, __m64 __b) {
23972395
return (__m64)__builtin_elementwise_min((__v4hi)__a, (__v4hi)__b);
23982396
}
23992397

clang/test/CodeGen/X86/builtin_test_helpers.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,11 @@ constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f,
230230
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
231231
}
232232

233+
constexpr bool match_v16su(__m512i _v, unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, unsigned int m, unsigned int n, unsigned int o, unsigned int p) {
234+
__v16su v = (__v16su)_v;
235+
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
236+
}
237+
233238
constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, short __e03, short __e04, short __e05, short __e06, short __e07,
234239
short __e08, short __e09, short __e10, short __e11, short __e12, short __e13, short __e14, short __e15,
235240
short __e16, short __e17, short __e18, short __e19, short __e20, short __e21, short __e22, short __e23,

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,8 @@ __m64 test_mm_max_pi16(__m64 a, __m64 b) {
370370
return _mm_max_pi16(a, b);
371371
}
372372

373+
TEST_CONSTEXPR(match_v4hi(_mm_max_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, 2, -3, 4}), 1, 2, 3, 4));
374+
373375
__m64 test_mm_max_pu8(__m64 a, __m64 b) {
374376
// CHECK-LABEL: test_mm_max_pu8
375377
// CHECK: call <8 x i8> @llvm.umax.v8i8(
@@ -382,6 +384,8 @@ __m64 test_mm_min_pi16(__m64 a, __m64 b) {
382384
return _mm_min_pi16(a, b);
383385
}
384386

387+
TEST_CONSTEXPR(match_v4hi(_mm_min_pi16((__m64)(__v4hi){+1, -2, +3, -4}, (__m64)(__v4hi){-1, 2, -3, 4}), -1, -2, -3, -4));
388+
385389
__m64 test_mm_min_pu8(__m64 a, __m64 b) {
386390
// CHECK-LABEL: test_mm_min_pu8
387391
// CHECK: call <8 x i8> @llvm.umin.v8i8(

clang/test/CodeGen/X86/sse2-builtins.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,12 +855,16 @@ __m128i test_mm_max_epi16(__m128i A, __m128i B) {
855855
return _mm_max_epi16(A, B);
856856
}
857857

858+
TEST_CONSTEXPR(match_v8hi(_mm_max_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 5, 8, 12, 20, 32));
859+
858860
__m128i test_mm_max_epu8(__m128i A, __m128i B) {
859861
// CHECK-LABEL: test_mm_max_epu8
860862
// CHECK: call <16 x i8> @llvm.umax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
861863
return _mm_max_epu8(A, B);
862864
}
863865

866+
TEST_CONSTEXPR(match_v16qu(_mm_max_epu8((__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 9, 10, 11, 12, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16));
867+
864868
__m128d test_mm_max_pd(__m128d A, __m128d B) {
865869
// CHECK-LABEL: test_mm_max_pd
866870
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
@@ -885,12 +889,16 @@ __m128i test_mm_min_epi16(__m128i A, __m128i B) {
885889
return _mm_min_epi16(A, B);
886890
}
887891

892+
TEST_CONSTEXPR(match_v8hi(_mm_min_epi16((__m128i)(__v8hi){1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v8hi){1, 2, 3, 5, 8, 12, 20, 32}), 1, 2, 3, 4, 5, 6, 7, 8));
893+
888894
__m128i test_mm_min_epu8(__m128i A, __m128i B) {
889895
// CHECK-LABEL: test_mm_min_epu8
890896
// CHECK: call <16 x i8> @llvm.umin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
891897
return _mm_min_epu8(A, B);
892898
}
893899

900+
TEST_CONSTEXPR(match_v16qu(_mm_min_epu8((__m128i)(__v16qu){9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8}, (__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
901+
894902
__m128d test_mm_min_pd(__m128d A, __m128d B) {
895903
// CHECK-LABEL: test_mm_min_pd
896904
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})

clang/test/CodeGen/X86/sse41-builtins.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,48 +291,64 @@ __m128i test_mm_max_epi8(__m128i x, __m128i y) {
291291
return _mm_max_epi8(x, y);
292292
}
293293

294+
TEST_CONSTEXPR(match_v16qi(_mm_max_epi8((__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), +1, +2, +3, +4, +5, +6, +7, +8, +9, +10, +11, +12, +13, +14, +15, +16));
295+
294296
__m128i test_mm_max_epi32(__m128i x, __m128i y) {
295297
// CHECK-LABEL: test_mm_max_epi32
296298
// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
297299
return _mm_max_epi32(x, y);
298300
}
299301

302+
TEST_CONSTEXPR(match_v4si(_mm_max_epi32((__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), +1, +2, +3, +4 ));
303+
300304
__m128i test_mm_max_epu16(__m128i x, __m128i y) {
301305
// CHECK-LABEL: test_mm_max_epu16
302306
// CHECK: call <8 x i16> @llvm.umax.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
303307
return _mm_max_epu16(x, y);
304308
}
305309

310+
TEST_CONSTEXPR(match_v8hu(_mm_max_epu16((__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 3, 4, 5, 7, 9, 11, 13, 15));
311+
306312
__m128i test_mm_max_epu32(__m128i x, __m128i y) {
307313
// CHECK-LABEL: test_mm_max_epu32
308314
// CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
309315
return _mm_max_epu32(x, y);
310316
}
311317

318+
TEST_CONSTEXPR(match_v4su(_mm_max_epu32((__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 3, 4, 5, 7));
319+
312320
__m128i test_mm_min_epi8(__m128i x, __m128i y) {
313321
// CHECK-LABEL: test_mm_min_epi8
314322
// CHECK: call <16 x i8> @llvm.smin.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
315323
return _mm_min_epi8(x, y);
316324
}
317325

326+
TEST_CONSTEXPR(match_v16qi(_mm_min_epi8((__m128i)(__v16qs){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, (__m128i)(__v16qs){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}), -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16));
327+
318328
__m128i test_mm_min_epi32(__m128i x, __m128i y) {
319329
// CHECK-LABEL: test_mm_min_epi32
320330
// CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
321331
return _mm_min_epi32(x, y);
322332
}
323333

334+
TEST_CONSTEXPR(match_v4si(_mm_min_epi32((__m128i)(__v4si){-1, +2, -3, +4}, (__m128i)(__v4si){+1, -2, +3, -4}), -1, -2, -3, -4 ));
335+
324336
__m128i test_mm_min_epu16(__m128i x, __m128i y) {
325337
// CHECK-LABEL: test_mm_min_epu16
326338
// CHECK: call <8 x i16> @llvm.umin.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
327339
return _mm_min_epu16(x, y);
328340
}
329341

342+
TEST_CONSTEXPR(match_v8hu(_mm_min_epu16((__m128i)(__v8hu){1, 3, 5, 7, 9, 11, 13, 15}, (__m128i)(__v8hu){3, 4, 5, 6, 7, 8, 9, 10}), 1, 3, 5, 6, 7, 8, 9, 10));
343+
330344
__m128i test_mm_min_epu32(__m128i x, __m128i y) {
331345
// CHECK-LABEL: test_mm_min_epu32
332346
// CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
333347
return _mm_min_epu32(x, y);
334348
}
335349

350+
TEST_CONSTEXPR(match_v4su(_mm_min_epu32((__m128i)(__v4su){1, 3, 5, 7}, (__m128i)(__v4su){3, 4, 5, 6}), 1, 3, 5, 6));
351+
336352
__m128i test_mm_minpos_epu16(__m128i x) {
337353
// CHECK-LABEL: test_mm_minpos_epu16
338354
// CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}})

0 commit comments

Comments
 (0)