Skip to content

Commit a81c406

Browse files
author
whyuuwang
committed
deal this issues 155395
VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr #155395
1 parent 3219fb0 commit a81c406

File tree

8 files changed

+250
-77
lines changed

8 files changed

+250
-77
lines changed

clang/lib/Headers/avx2intrin.h

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -854,10 +854,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
854854
/// \param __b
855855
/// A 256-bit vector of [16 x i16] containing one of the source operands.
856856
/// \returns A 256-bit vector of [16 x i16] containing the sums.
857-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
858-
_mm256_hadd_epi16(__m256i __a, __m256i __b)
859-
{
860-
return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
857+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
858+
_mm256_hadd_epi16(__m256i __a, __m256i __b) {
859+
return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
861860
}
862861

863862
/// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit
@@ -886,7 +885,7 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b)
886885
/// \param __b
887886
/// A 256-bit vector of [8 x i32] containing one of the source operands.
888887
/// \returns A 256-bit vector of [8 x i32] containing the sums.
889-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
888+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
890889
_mm256_hadd_epi32(__m256i __a, __m256i __b)
891890
{
892891
return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
@@ -921,10 +920,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b)
921920
/// \param __b
922921
/// A 256-bit vector of [16 x i16] containing one of the source operands.
923922
/// \returns A 256-bit vector of [16 x i16] containing the sums.
924-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
925-
_mm256_hadds_epi16(__m256i __a, __m256i __b)
926-
{
927-
return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
923+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
924+
_mm256_hadds_epi16(__m256i __a, __m256i __b) {
925+
return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
928926
}
929927

930928
/// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit
@@ -957,10 +955,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b)
957955
/// \param __b
958956
/// A 256-bit vector of [16 x i16] containing one of the source operands.
959957
/// \returns A 256-bit vector of [16 x i16] containing the differences.
960-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
961-
_mm256_hsub_epi16(__m256i __a, __m256i __b)
962-
{
963-
return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
958+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
959+
_mm256_hsub_epi16(__m256i __a, __m256i __b) {
960+
return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
964961
}
965962

966963
/// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit
@@ -989,7 +986,7 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b)
989986
/// \param __b
990987
/// A 256-bit vector of [8 x i32] containing one of the source operands.
991988
/// \returns A 256-bit vector of [8 x i32] containing the differences.
992-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
989+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
993990
_mm256_hsub_epi32(__m256i __a, __m256i __b)
994991
{
995992
return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
@@ -1025,7 +1022,7 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b)
10251022
/// \param __b
10261023
/// A 256-bit vector of [16 x i16] containing one of the source operands.
10271024
/// \returns A 256-bit vector of [16 x i16] containing the differences.
1028-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1025+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
10291026
_mm256_hsubs_epi16(__m256i __a, __m256i __b)
10301027
{
10311028
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);

clang/lib/Headers/avxintrin.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ _mm256_xor_ps(__m256 __a, __m256 __b)
703703
/// elements of a vector of [4 x double].
704704
/// \returns A 256-bit vector of [4 x double] containing the horizontal sums of
705705
/// both operands.
706-
static __inline __m256d __DEFAULT_FN_ATTRS
706+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
707707
_mm256_hadd_pd(__m256d __a, __m256d __b)
708708
{
709709
return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
@@ -726,9 +726,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b)
726726
/// index 2, 3, 6, 7 of a vector of [8 x float].
727727
/// \returns A 256-bit vector of [8 x float] containing the horizontal sums of
728728
/// both operands.
729-
static __inline __m256 __DEFAULT_FN_ATTRS
730-
_mm256_hadd_ps(__m256 __a, __m256 __b)
731-
{
729+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
730+
_mm256_hadd_ps(__m256 __a, __m256 __b) {
732731
return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
733732
}
734733

@@ -749,7 +748,7 @@ _mm256_hadd_ps(__m256 __a, __m256 __b)
749748
/// odd-indexed elements of a vector of [4 x double].
750749
/// \returns A 256-bit vector of [4 x double] containing the horizontal
751750
/// differences of both operands.
752-
static __inline __m256d __DEFAULT_FN_ATTRS
751+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
753752
_mm256_hsub_pd(__m256d __a, __m256d __b)
754753
{
755754
return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
@@ -772,7 +771,7 @@ _mm256_hsub_pd(__m256d __a, __m256d __b)
772771
/// elements with index 2, 3, 6, 7 of a vector of [8 x float].
773772
/// \returns A 256-bit vector of [8 x float] containing the horizontal
774773
/// differences of both operands.
775-
static __inline __m256 __DEFAULT_FN_ATTRS
774+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
776775
_mm256_hsub_ps(__m256 __a, __m256 __b)
777776
{
778777
return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);

clang/lib/Headers/pmmintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b)
8989
/// destination.
9090
/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
9191
/// both operands.
92-
static __inline__ __m128 __DEFAULT_FN_ATTRS
93-
_mm_hadd_ps(__m128 __a, __m128 __b)
94-
{
92+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
93+
_mm_hadd_ps(__m128 __a, __m128 __b) {
9594
return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
9695
}
9796

@@ -174,9 +173,8 @@ _mm_moveldup_ps(__m128 __a)
174173
/// A 128-bit vector of [2 x double] containing the right source operand.
175174
/// \returns A 128-bit vector of [2 x double] containing the alternating sums
176175
/// and differences of both operands.
177-
static __inline__ __m128d __DEFAULT_FN_ATTRS
178-
_mm_addsub_pd(__m128d __a, __m128d __b)
179-
{
176+
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
177+
_mm_addsub_pd(__m128d __a, __m128d __b) {
180178
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
181179
}
182180

@@ -197,9 +195,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b)
197195
/// destination.
198196
/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
199197
/// both operands.
200-
static __inline__ __m128d __DEFAULT_FN_ATTRS
201-
_mm_hadd_pd(__m128d __a, __m128d __b)
202-
{
198+
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
199+
_mm_hadd_pd(__m128d __a, __m128d __b) {
203200
return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
204201
}
205202

@@ -220,9 +217,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b)
220217
/// the destination.
221218
/// \returns A 128-bit vector of [2 x double] containing the horizontal
222219
/// differences of both operands.
223-
static __inline__ __m128d __DEFAULT_FN_ATTRS
224-
_mm_hsub_pd(__m128d __a, __m128d __b)
225-
{
220+
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
221+
_mm_hsub_pd(__m128d __a, __m128d __b) {
226222
return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
227223
}
228224

clang/lib/Headers/tmmintrin.h

Lines changed: 36 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,10 @@ _mm_abs_epi32(__m128i __a) {
204204
/// destination.
205205
/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
206206
/// both operands.
207-
static __inline__ __m128i __DEFAULT_FN_ATTRS
208-
_mm_hadd_epi16(__m128i __a, __m128i __b)
209-
{
210-
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
207+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
208+
_mm_hadd_epi16(__m128i __a, __m128i __b) {
209+
return (__m128i)__builtin_ia32_phaddw128(
210+
(__v8hi)__a, (__v8hi)__b);
211211
}
212212

213213
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -227,10 +227,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b)
227227
/// destination.
228228
/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
229229
/// both operands.
230-
static __inline__ __m128i __DEFAULT_FN_ATTRS
231-
_mm_hadd_epi32(__m128i __a, __m128i __b)
232-
{
233-
return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
230+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
231+
_mm_hadd_epi32(__m128i __a, __m128i __b) {
232+
return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
234233
}
235234

236235
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -250,11 +249,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b)
250249
/// destination.
251250
/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
252251
/// operands.
253-
static __inline__ __m64 __DEFAULT_FN_ATTRS
254-
_mm_hadd_pi16(__m64 __a, __m64 __b)
255-
{
256-
return __trunc64(__builtin_ia32_phaddw128(
257-
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
252+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
253+
_mm_hadd_pi16(__m64 __a, __m64 __b) {
254+
return __trunc64(__builtin_ia32_phaddw128(
255+
(__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
258256
}
259257

260258
/// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -274,7 +272,7 @@ _mm_hadd_pi16(__m64 __a, __m64 __b)
274272
/// destination.
275273
/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
276274
/// operands.
277-
static __inline__ __m64 __DEFAULT_FN_ATTRS
275+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
278276
_mm_hadd_pi32(__m64 __a, __m64 __b)
279277
{
280278
return __trunc64(__builtin_ia32_phaddd128(
@@ -301,10 +299,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
301299
/// destination.
302300
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
303301
/// sums of both operands.
304-
static __inline__ __m128i __DEFAULT_FN_ATTRS
305-
_mm_hadds_epi16(__m128i __a, __m128i __b)
306-
{
307-
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
302+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
303+
_mm_hadds_epi16(__m128i __a, __m128i __b) {
304+
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
308305
}
309306

310307
/// Horizontally adds, with saturation, the adjacent pairs of values contained
@@ -327,7 +324,7 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
327324
/// destination.
328325
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
329326
/// sums of both operands.
330-
static __inline__ __m64 __DEFAULT_FN_ATTRS
327+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
331328
_mm_hadds_pi16(__m64 __a, __m64 __b)
332329
{
333330
return __trunc64(__builtin_ia32_phaddsw128(
@@ -351,10 +348,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b)
351348
/// the destination.
352349
/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
353350
/// of both operands.
354-
static __inline__ __m128i __DEFAULT_FN_ATTRS
355-
_mm_hsub_epi16(__m128i __a, __m128i __b)
356-
{
357-
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
351+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
352+
_mm_hsub_epi16(__m128i __a, __m128i __b) {
353+
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
358354
}
359355

360356
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -374,10 +370,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b)
374370
/// the destination.
375371
/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
376372
/// of both operands.
377-
static __inline__ __m128i __DEFAULT_FN_ATTRS
378-
_mm_hsub_epi32(__m128i __a, __m128i __b)
379-
{
380-
return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
373+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
374+
_mm_hsub_epi32(__m128i __a, __m128i __b) {
375+
return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
381376
}
382377

383378
/// Horizontally subtracts the adjacent pairs of values contained in 2
@@ -397,7 +392,7 @@ _mm_hsub_epi32(__m128i __a, __m128i __b)
397392
/// the destination.
398393
/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
399394
/// of both operands.
400-
static __inline__ __m64 __DEFAULT_FN_ATTRS
395+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
401396
_mm_hsub_pi16(__m64 __a, __m64 __b)
402397
{
403398
return __trunc64(__builtin_ia32_phsubw128(
@@ -421,7 +416,7 @@ _mm_hsub_pi16(__m64 __a, __m64 __b)
421416
/// the destination.
422417
/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
423418
/// of both operands.
424-
static __inline__ __m64 __DEFAULT_FN_ATTRS
419+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
425420
_mm_hsub_pi32(__m64 __a, __m64 __b)
426421
{
427422
return __trunc64(__builtin_ia32_phsubd128(
@@ -448,10 +443,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
448443
/// the destination.
449444
/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
450445
/// differences of both operands.
451-
static __inline__ __m128i __DEFAULT_FN_ATTRS
452-
_mm_hsubs_epi16(__m128i __a, __m128i __b)
453-
{
454-
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
446+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
447+
_mm_hsubs_epi16(__m128i __a, __m128i __b) {
448+
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
455449
}
456450

457451
/// Horizontally subtracts, with saturation, the adjacent pairs of values
@@ -474,7 +468,7 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
474468
/// the destination.
475469
/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
476470
/// differences of both operands.
477-
static __inline__ __m64 __DEFAULT_FN_ATTRS
471+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
478472
_mm_hsubs_pi16(__m64 __a, __m64 __b)
479473
{
480474
return __trunc64(__builtin_ia32_phsubsw128(
@@ -509,10 +503,9 @@ _mm_hsubs_pi16(__m64 __a, __m64 __b)
509503
/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
510504
/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
511505
/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
512-
static __inline__ __m128i __DEFAULT_FN_ATTRS
513-
_mm_maddubs_epi16(__m128i __a, __m128i __b)
514-
{
515-
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
506+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
507+
_mm_maddubs_epi16(__m128i __a, __m128i __b) {
508+
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
516509
}
517510

518511
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@@ -539,11 +532,10 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b)
539532
/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
540533
/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
541534
/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
542-
static __inline__ __m64 __DEFAULT_FN_ATTRS
543-
_mm_maddubs_pi16(__m64 __a, __m64 __b)
544-
{
545-
return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
546-
(__v16qi)__anyext128(__b)));
535+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
536+
_mm_maddubs_pi16(__m64 __a, __m64 __b) {
537+
return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
538+
(__v16qi)__anyext128(__b)));
547539
}
548540

549541
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
@@ -560,7 +552,7 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b)
560552
/// A 128-bit vector of [8 x i16] containing one of the source operands.
561553
/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
562554
/// products of both operands.
563-
static __inline__ __m128i __DEFAULT_FN_ATTRS
555+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
564556
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
565557
{
566558
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,24 +1083,53 @@ __m256d test_mm256_hadd_pd(__m256d A, __m256d B) {
10831083
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
10841084
return _mm256_hadd_pd(A, B);
10851085
}
1086+
constexpr bool test_mm256_hadd_epi32_constexpr() {
1087+
constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
1088+
constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0);
1089+
constexpr __m256d result = _mm256_hadd_pd(a, b);
1090+
return match_m256d(result,1.0+2.0,3.0+4.0,5.0+6.0,7.0+8.0);
1091+
}
1092+
TEST_CONSTEXPR(test_mm256_hadd_epi32_constexpr())
10861093

10871094
__m256 test_mm256_hadd_ps(__m256 A, __m256 B) {
10881095
// CHECK-LABEL: test_mm256_hadd_ps
10891096
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
10901097
return _mm256_hadd_ps(A, B);
10911098
}
1099+
constexpr bool test_mm256_hadd_ps_constexpr() {
1100+
constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
1101+
constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f);
1102+
constexpr __m256 result = _mm256_hadd_ps(a, b);
1103+
return match_m256(result,1.0f+2.0f,3.0f+4.0f,5.0f+6.0f,7.0f+8.0f,
1104+
9.0f+10.0f,11.0f+12.0f,13.0f+14.0f,15.0f+16.0f);
1105+
}
1106+
TEST_CONSTEXPR(test_mm256_hadd_ps_constexpr())
10921107

10931108
__m256d test_mm256_hsub_pd(__m256d A, __m256d B) {
10941109
// CHECK-LABEL: test_mm256_hsub_pd
10951110
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
10961111
return _mm256_hsub_pd(A, B);
10971112
}
1113+
constexpr bool test_mm256_hsub_pd_constexpr() {
1114+
constexpr __m256d a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
1115+
constexpr __m256d b = _mm256_set_pd(5.0, 6.0, 7.0, 8.0);
1116+
constexpr __m256d result = _mm256_hsub_pd(a, b);
1117+
return match_m256d(result,1.0-2.0,3.0-4.0,5.0-6.0,7.0-8.0);
1118+
}
1119+
TEST_CONSTEXPR(test_mm256_hsub_pd_constexpr())
10981120

10991121
__m256 test_mm256_hsub_ps(__m256 A, __m256 B) {
11001122
// CHECK-LABEL: test_mm256_hsub_ps
11011123
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
11021124
return _mm256_hsub_ps(A, B);
11031125
}
1126+
constexpr bool test_mm256_hsub_ps_constexpr() {
1127+
constexpr __m256 a = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
1128+
constexpr __m256 b = _mm256_set_ps(9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f);
1129+
constexpr __m256 result = _mm256_hsub_ps(a, b);
1130+
return match_m256(result,1.0f-2.0f,3.0f-4.0f,5.0f-6.0f,7.0f-8.0f,
1131+
9.0f-10.0f,11.0f-12.0f,13.0f-14.0f,15.0f-16.0f);
1132+
}
11041133

11051134
__m256i test_mm256_insert_epi8(__m256i x, char b) {
11061135
// CHECK-LABEL: test_mm256_insert_epi8

0 commit comments

Comments
 (0)