Skip to content

Commit e1d6753

Browse files
yichi170RKSimon
andauthored
[Headers][X86] Update AVX/AVX512 float/double add/sub/mul/div/unpck intrinsics to be used in constexpr (#152435)
Fixed #152313 --------- Co-authored-by: Simon Pilgrim <[email protected]>
1 parent 246990d commit e1d6753

File tree

4 files changed

+72
-75
lines changed

4 files changed

+72
-75
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -835,45 +835,38 @@ _mm512_xor_si512(__m512i __a, __m512i __b)
835835

836836
/* Arithmetic */
837837

838-
static __inline __m512d __DEFAULT_FN_ATTRS512
839-
_mm512_add_pd(__m512d __a, __m512d __b)
840-
{
838+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
839+
_mm512_add_pd(__m512d __a, __m512d __b) {
841840
return (__m512d)((__v8df)__a + (__v8df)__b);
842841
}
843842

844-
static __inline __m512 __DEFAULT_FN_ATTRS512
845-
_mm512_add_ps(__m512 __a, __m512 __b)
846-
{
843+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
844+
_mm512_add_ps(__m512 __a, __m512 __b) {
847845
return (__m512)((__v16sf)__a + (__v16sf)__b);
848846
}
849847

850-
static __inline __m512d __DEFAULT_FN_ATTRS512
851-
_mm512_mul_pd(__m512d __a, __m512d __b)
852-
{
848+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
849+
_mm512_mul_pd(__m512d __a, __m512d __b) {
853850
return (__m512d)((__v8df)__a * (__v8df)__b);
854851
}
855852

856-
static __inline __m512 __DEFAULT_FN_ATTRS512
857-
_mm512_mul_ps(__m512 __a, __m512 __b)
858-
{
853+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
854+
_mm512_mul_ps(__m512 __a, __m512 __b) {
859855
return (__m512)((__v16sf)__a * (__v16sf)__b);
860856
}
861857

862-
static __inline __m512d __DEFAULT_FN_ATTRS512
863-
_mm512_sub_pd(__m512d __a, __m512d __b)
864-
{
858+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
859+
_mm512_sub_pd(__m512d __a, __m512d __b) {
865860
return (__m512d)((__v8df)__a - (__v8df)__b);
866861
}
867862

868-
static __inline __m512 __DEFAULT_FN_ATTRS512
869-
_mm512_sub_ps(__m512 __a, __m512 __b)
870-
{
863+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
864+
_mm512_sub_ps(__m512 __a, __m512 __b) {
871865
return (__m512)((__v16sf)__a - (__v16sf)__b);
872866
}
873867

874-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
875-
_mm512_add_epi64 (__m512i __A, __m512i __B)
876-
{
868+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
869+
_mm512_add_epi64(__m512i __A, __m512i __B) {
877870
return (__m512i) ((__v8du) __A + (__v8du) __B);
878871
}
879872

@@ -2315,9 +2308,8 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
23152308
(__v2df)_mm_setzero_pd(), \
23162309
(__mmask8)(U), (int)(R)))
23172310

2318-
static __inline __m512d __DEFAULT_FN_ATTRS512
2319-
_mm512_div_pd(__m512d __a, __m512d __b)
2320-
{
2311+
static __inline __m512d
2312+
__DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_div_pd(__m512d __a, __m512d __b) {
23212313
return (__m512d)((__v8df)__a/(__v8df)__b);
23222314
}
23232315

@@ -2335,9 +2327,8 @@ _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
23352327
(__v8df)_mm512_setzero_pd());
23362328
}
23372329

2338-
static __inline __m512 __DEFAULT_FN_ATTRS512
2339-
_mm512_div_ps(__m512 __a, __m512 __b)
2340-
{
2330+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
2331+
_mm512_div_ps(__m512 __a, __m512 __b) {
23412332
return (__m512)((__v16sf)__a/(__v16sf)__b);
23422333
}
23432334

@@ -4123,9 +4114,8 @@ _mm512_cvtss_f32(__m512 __a)
41234114

41244115
/* Unpack and Interleave */
41254116

4126-
static __inline __m512d __DEFAULT_FN_ATTRS512
4127-
_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4128-
{
4117+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4118+
_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
41294119
return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
41304120
1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
41314121
}
@@ -4146,9 +4136,8 @@ _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
41464136
(__v8df)_mm512_setzero_pd());
41474137
}
41484138

4149-
static __inline __m512d __DEFAULT_FN_ATTRS512
4150-
_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4151-
{
4139+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4140+
_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
41524141
return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
41534142
0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
41544143
}
@@ -4169,9 +4158,8 @@ _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
41694158
(__v8df)_mm512_setzero_pd());
41704159
}
41714160

4172-
static __inline __m512 __DEFAULT_FN_ATTRS512
4173-
_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4174-
{
4161+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
4162+
_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
41754163
return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
41764164
2, 18, 3, 19,
41774165
2+4, 18+4, 3+4, 19+4,
@@ -4195,9 +4183,8 @@ _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
41954183
(__v16sf)_mm512_setzero_ps());
41964184
}
41974185

4198-
static __inline __m512 __DEFAULT_FN_ATTRS512
4199-
_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4200-
{
4186+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
4187+
_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
42014188
return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
42024189
0, 16, 1, 17,
42034190
0+4, 16+4, 1+4, 17+4,

clang/lib/Headers/avxintrin.h

Lines changed: 24 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,8 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
8787
/// A 256-bit vector of [4 x double] containing one of the source operands.
8888
/// \returns A 256-bit vector of [4 x double] containing the sums of both
8989
/// operands.
90-
static __inline __m256d __DEFAULT_FN_ATTRS
91-
_mm256_add_pd(__m256d __a, __m256d __b)
92-
{
90+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
91+
_mm256_add_pd(__m256d __a, __m256d __b) {
9392
return (__m256d)((__v4df)__a+(__v4df)__b);
9493
}
9594

@@ -105,9 +104,8 @@ _mm256_add_pd(__m256d __a, __m256d __b)
105104
/// A 256-bit vector of [8 x float] containing one of the source operands.
106105
/// \returns A 256-bit vector of [8 x float] containing the sums of both
107106
/// operands.
108-
static __inline __m256 __DEFAULT_FN_ATTRS
109-
_mm256_add_ps(__m256 __a, __m256 __b)
110-
{
107+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a,
108+
__m256 __b) {
111109
return (__m256)((__v8sf)__a+(__v8sf)__b);
112110
}
113111

@@ -123,9 +121,8 @@ _mm256_add_ps(__m256 __a, __m256 __b)
123121
/// A 256-bit vector of [4 x double] containing the subtrahend.
124122
/// \returns A 256-bit vector of [4 x double] containing the differences between
125123
/// both operands.
126-
static __inline __m256d __DEFAULT_FN_ATTRS
127-
_mm256_sub_pd(__m256d __a, __m256d __b)
128-
{
124+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
125+
_mm256_sub_pd(__m256d __a, __m256d __b) {
129126
return (__m256d)((__v4df)__a-(__v4df)__b);
130127
}
131128

@@ -141,9 +138,8 @@ _mm256_sub_pd(__m256d __a, __m256d __b)
141138
/// A 256-bit vector of [8 x float] containing the subtrahend.
142139
/// \returns A 256-bit vector of [8 x float] containing the differences between
143140
/// both operands.
144-
static __inline __m256 __DEFAULT_FN_ATTRS
145-
_mm256_sub_ps(__m256 __a, __m256 __b)
146-
{
141+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
142+
__m256 __b) {
147143
return (__m256)((__v8sf)__a-(__v8sf)__b);
148144
}
149145

@@ -197,9 +193,8 @@ _mm256_addsub_ps(__m256 __a, __m256 __b)
197193
/// A 256-bit vector of [4 x double] containing the divisor.
198194
/// \returns A 256-bit vector of [4 x double] containing the quotients of both
199195
/// operands.
200-
static __inline __m256d __DEFAULT_FN_ATTRS
201-
_mm256_div_pd(__m256d __a, __m256d __b)
202-
{
196+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
197+
_mm256_div_pd(__m256d __a, __m256d __b) {
203198
return (__m256d)((__v4df)__a/(__v4df)__b);
204199
}
205200

@@ -215,9 +210,8 @@ _mm256_div_pd(__m256d __a, __m256d __b)
215210
/// A 256-bit vector of [8 x float] containing the divisor.
216211
/// \returns A 256-bit vector of [8 x float] containing the quotients of both
217212
/// operands.
218-
static __inline __m256 __DEFAULT_FN_ATTRS
219-
_mm256_div_ps(__m256 __a, __m256 __b)
220-
{
213+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a,
214+
__m256 __b) {
221215
return (__m256)((__v8sf)__a/(__v8sf)__b);
222216
}
223217

@@ -317,9 +311,8 @@ _mm256_min_ps(__m256 __a, __m256 __b)
317311
/// A 256-bit vector of [4 x double] containing one of the operands.
318312
/// \returns A 256-bit vector of [4 x double] containing the products of both
319313
/// operands.
320-
static __inline __m256d __DEFAULT_FN_ATTRS
321-
_mm256_mul_pd(__m256d __a, __m256d __b)
322-
{
314+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
315+
_mm256_mul_pd(__m256d __a, __m256d __b) {
323316
return (__m256d)((__v4df)__a * (__v4df)__b);
324317
}
325318

@@ -335,9 +328,8 @@ _mm256_mul_pd(__m256d __a, __m256d __b)
335328
/// A 256-bit vector of [8 x float] containing one of the operands.
336329
/// \returns A 256-bit vector of [8 x float] containing the products of both
337330
/// operands.
338-
static __inline __m256 __DEFAULT_FN_ATTRS
339-
_mm256_mul_ps(__m256 __a, __m256 __b)
340-
{
331+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a,
332+
__m256 __b) {
341333
return (__m256)((__v8sf)__a * (__v8sf)__b);
342334
}
343335

@@ -2462,9 +2454,8 @@ _mm256_movedup_pd(__m256d __a)
24622454
/// Bits [127:64] are written to bits [127:64] of the return value. \n
24632455
/// Bits [255:192] are written to bits [255:192] of the return value. \n
24642456
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2465-
static __inline __m256d __DEFAULT_FN_ATTRS
2466-
_mm256_unpackhi_pd(__m256d __a, __m256d __b)
2467-
{
2457+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
2458+
_mm256_unpackhi_pd(__m256d __a, __m256d __b) {
24682459
return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
24692460
}
24702461

@@ -2484,9 +2475,8 @@ _mm256_unpackhi_pd(__m256d __a, __m256d __b)
24842475
/// Bits [63:0] are written to bits [127:64] of the return value. \n
24852476
/// Bits [191:128] are written to bits [255:192] of the return value. \n
24862477
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2487-
static __inline __m256d __DEFAULT_FN_ATTRS
2488-
_mm256_unpacklo_pd(__m256d __a, __m256d __b)
2489-
{
2478+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
2479+
_mm256_unpacklo_pd(__m256d __a, __m256d __b) {
24902480
return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
24912481
}
24922482

@@ -2511,9 +2501,8 @@ _mm256_unpacklo_pd(__m256d __a, __m256d __b)
25112501
/// Bits [223:192] are written to bits [191:160] of the return value. \n
25122502
/// Bits [255:224] are written to bits [255:224] of the return value.
25132503
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2514-
static __inline __m256 __DEFAULT_FN_ATTRS
2515-
_mm256_unpackhi_ps(__m256 __a, __m256 __b)
2516-
{
2504+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
2505+
_mm256_unpackhi_ps(__m256 __a, __m256 __b) {
25172506
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
25182507
}
25192508

@@ -2538,9 +2527,8 @@ _mm256_unpackhi_ps(__m256 __a, __m256 __b)
25382527
/// Bits [159:128] are written to bits [191:160] of the return value. \n
25392528
/// Bits [191:160] are written to bits [255:224] of the return value.
25402529
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2541-
static __inline __m256 __DEFAULT_FN_ATTRS
2542-
_mm256_unpacklo_ps(__m256 __a, __m256 __b)
2543-
{
2530+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
2531+
_mm256_unpacklo_ps(__m256 __a, __m256 __b) {
25442532
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
25452533
}
25462534

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ __m256d test_mm256_add_pd(__m256d A, __m256d B) {
2020
// CHECK: fadd <4 x double>
2121
return _mm256_add_pd(A, B);
2222
}
23+
TEST_CONSTEXPR(match_m256d( _mm256_add_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), -8.0, -10.0, +12.0, +14.0));
2324

2425
__m256 test_mm256_add_ps(__m256 A, __m256 B) {
2526
// CHECK-LABEL: test_mm256_add_ps
2627
// CHECK: fadd <8 x float>
2728
return _mm256_add_ps(A, B);
2829
}
30+
TEST_CONSTEXPR(match_m256(_mm256_add_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), -8.0f, -10.0f, +12.0f, +14.0f, +14.0f, +12.0f, -10.0f, -8.0f));
2931

3032
__m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
3133
// CHECK-LABEL: test_mm256_addsub_pd
@@ -977,12 +979,14 @@ __m256d test_mm256_div_pd(__m256d A, __m256d B) {
977979
// CHECK: fdiv <4 x double>
978980
return _mm256_div_pd(A, B);
979981
}
982+
TEST_CONSTEXPR(match_m256d( _mm256_div_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-1.0, +1.0, -1.0, +1.0}), +4.0, -5.0, -6.0, +7.0));
980983

981984
__m256 test_mm256_div_ps(__m256 A, __m256 B) {
982985
// CHECK-LABEL: test_mm256_div_ps
983986
// CHECK: fdiv <8 x float>
984987
return _mm256_div_ps(A, B);
985988
}
989+
TEST_CONSTEXPR(match_m256( _mm256_div_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-1.0f, +1.0f, -1.0f, +1.0f, +1.0f, -1.0f, +1.0f, -1.0f}), +4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, +4.0f));
986990

987991
__m256 test_mm256_dp_ps(__m256 A, __m256 B) {
988992
// CHECK-LABEL: test_mm256_dp_ps
@@ -1295,12 +1299,14 @@ __m256d test_mm256_mul_pd(__m256d A, __m256d B) {
12951299
// CHECK: fmul <4 x double>
12961300
return _mm256_mul_pd(A, B);
12971301
}
1302+
TEST_CONSTEXPR(match_m256d( _mm256_mul_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), +16.0, +25.0, +36.0, +49.0));
12981303

12991304
__m256 test_mm256_mul_ps(__m256 A, __m256 B) {
13001305
// CHECK-LABEL: test_mm256_mul_ps
13011306
// CHECK: fmul <8 x float>
13021307
return _mm256_mul_ps(A, B);
13031308
}
1309+
TEST_CONSTEXPR(match_m256( _mm256_mul_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), +16.0f, +25.0f, +36.0f, +49.0f, +49.0f, +36.0f, +25.0f, +16.0f));
13041310

13051311
__m256d test_mm256_or_pd(__m256d A, __m256d B) {
13061312
// CHECK-LABEL: test_mm256_or_pd
@@ -1933,12 +1939,14 @@ __m256d test_mm256_sub_pd(__m256d A, __m256d B) {
19331939
// CHECK: fsub <4 x double>
19341940
return _mm256_sub_pd(A, B);
19351941
}
1942+
TEST_CONSTEXPR(match_m256d( _mm256_sub_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-0.0, +0.0, +2.0, -1.0}), -4.0, -5.0, 4.0, 8.0));
19361943

19371944
__m256 test_mm256_sub_ps(__m256 A, __m256 B) {
19381945
// CHECK-LABEL: test_mm256_sub_ps
19391946
// CHECK: fsub <8 x float>
19401947
return _mm256_sub_ps(A, B);
19411948
}
1949+
TEST_CONSTEXPR(match_m256( _mm256_sub_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-0.0f, +0.0f, +2.0f, -1.0f, -1.0f, +2.0f, +0.0f, -0.0f}), -4.0f, -5.0f, 4.0f, 8.0f, 8.0f, 4.0f, -5.0f, -4.0f));
19421950

19431951
int test_mm_testc_pd(__m128d A, __m128d B) {
19441952
// CHECK-LABEL: test_mm_testc_pd
@@ -2062,24 +2070,28 @@ __m256d test_mm256_unpackhi_pd(__m256d A, __m256d B) {
20622070
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20632071
return _mm256_unpackhi_pd(A, B);
20642072
}
2073+
TEST_CONSTEXPR(match_m256d(_mm256_unpackhi_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +2.0, +6.0, +4.0, +8.0));
20652074

20662075
__m256 test_mm256_unpackhi_ps(__m256 A, __m256 B) {
20672076
// CHECK-LABEL: test_mm256_unpackhi_ps
20682077
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
20692078
return _mm256_unpackhi_ps(A, B);
20702079
}
2080+
TEST_CONSTEXPR(match_m256(_mm256_unpackhi_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +2.0f, +12.0f, +3.0f, +13.0f, +6.0f, +16.0f, +7.0f, +17.0f));
20712081

20722082
__m256d test_mm256_unpacklo_pd(__m256d A, __m256d B) {
20732083
// CHECK-LABEL: test_mm256_unpacklo_pd
20742084
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20752085
return _mm256_unpacklo_pd(A, B);
20762086
}
2087+
TEST_CONSTEXPR(match_m256d(_mm256_unpacklo_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +1.0, +5.0, +3.0, +7.0));
20772088

20782089
__m256 test_mm256_unpacklo_ps(__m256 A, __m256 B) {
20792090
// CHECK-LABEL: test_mm256_unpacklo_ps
20802091
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
20812092
return _mm256_unpacklo_ps(A, B);
20822093
}
2094+
TEST_CONSTEXPR(match_m256(_mm256_unpacklo_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +0.0f, +10.0f, +1.0f, +11.0f, +4.0f, +14.0f, +5.0f, +15.0f));
20832095

20842096
__m256d test_mm256_xor_pd(__m256d A, __m256d B) {
20852097
// CHECK-LABEL: test_mm256_xor_pd

0 commit comments

Comments
 (0)