Skip to content

Commit 7127a51

Browse files
committed
[Headers][X86] Update AVX/AVX512 float/double add/sub/mul/div/unpck intrinsics to be used in constexpr
1 parent 281e6d2 commit 7127a51

File tree

4 files changed

+70
-71
lines changed

4 files changed

+70
-71
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -835,45 +835,38 @@ _mm512_xor_si512(__m512i __a, __m512i __b)
835835

836836
/* Arithmetic */
837837

838-
static __inline __m512d __DEFAULT_FN_ATTRS512
839-
_mm512_add_pd(__m512d __a, __m512d __b)
840-
{
838+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
839+
_mm512_add_pd(__m512d __a, __m512d __b) {
841840
return (__m512d)((__v8df)__a + (__v8df)__b);
842841
}
843842

844-
static __inline __m512 __DEFAULT_FN_ATTRS512
845-
_mm512_add_ps(__m512 __a, __m512 __b)
846-
{
843+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
844+
_mm512_add_ps(__m512 __a, __m512 __b) {
847845
return (__m512)((__v16sf)__a + (__v16sf)__b);
848846
}
849847

850-
static __inline __m512d __DEFAULT_FN_ATTRS512
851-
_mm512_mul_pd(__m512d __a, __m512d __b)
852-
{
848+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
849+
_mm512_mul_pd(__m512d __a, __m512d __b) {
853850
return (__m512d)((__v8df)__a * (__v8df)__b);
854851
}
855852

856-
static __inline __m512 __DEFAULT_FN_ATTRS512
857-
_mm512_mul_ps(__m512 __a, __m512 __b)
858-
{
853+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
854+
_mm512_mul_ps(__m512 __a, __m512 __b) {
859855
return (__m512)((__v16sf)__a * (__v16sf)__b);
860856
}
861857

862-
static __inline __m512d __DEFAULT_FN_ATTRS512
863-
_mm512_sub_pd(__m512d __a, __m512d __b)
864-
{
858+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
859+
_mm512_sub_pd(__m512d __a, __m512d __b) {
865860
return (__m512d)((__v8df)__a - (__v8df)__b);
866861
}
867862

868-
static __inline __m512 __DEFAULT_FN_ATTRS512
869-
_mm512_sub_ps(__m512 __a, __m512 __b)
870-
{
863+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
864+
_mm512_sub_ps(__m512 __a, __m512 __b) {
871865
return (__m512)((__v16sf)__a - (__v16sf)__b);
872866
}
873867

874-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
875-
_mm512_add_epi64 (__m512i __A, __m512i __B)
876-
{
868+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
869+
_mm512_add_epi64(__m512i __A, __m512i __B) {
877870
return (__m512i) ((__v8du) __A + (__v8du) __B);
878871
}
879872

@@ -2315,7 +2308,7 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
23152308
(__v2df)_mm_setzero_pd(), \
23162309
(__mmask8)(U), (int)(R)))
23172310

2318-
static __inline __m512d __DEFAULT_FN_ATTRS512
2311+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
23192312
_mm512_div_pd(__m512d __a, __m512d __b)
23202313
{
23212314
return (__m512d)((__v8df)__a/(__v8df)__b);
@@ -2335,7 +2328,7 @@ _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
23352328
(__v8df)_mm512_setzero_pd());
23362329
}
23372330

2338-
static __inline __m512 __DEFAULT_FN_ATTRS512
2331+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
23392332
_mm512_div_ps(__m512 __a, __m512 __b)
23402333
{
23412334
return (__m512)((__v16sf)__a/(__v16sf)__b);
@@ -4123,9 +4116,8 @@ _mm512_cvtss_f32(__m512 __a)
41234116

41244117
/* Unpack and Interleave */
41254118

4126-
static __inline __m512d __DEFAULT_FN_ATTRS512
4127-
_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4128-
{
4119+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4120+
_mm512_unpackhi_pd(__m512d __a, __m512d __b) {
41294121
return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
41304122
1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
41314123
}
@@ -4146,9 +4138,8 @@ _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
41464138
(__v8df)_mm512_setzero_pd());
41474139
}
41484140

4149-
static __inline __m512d __DEFAULT_FN_ATTRS512
4150-
_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4151-
{
4141+
static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
4142+
_mm512_unpacklo_pd(__m512d __a, __m512d __b) {
41524143
return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
41534144
0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
41544145
}
@@ -4169,9 +4160,8 @@ _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
41694160
(__v8df)_mm512_setzero_pd());
41704161
}
41714162

4172-
static __inline __m512 __DEFAULT_FN_ATTRS512
4173-
_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4174-
{
4163+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
4164+
_mm512_unpackhi_ps(__m512 __a, __m512 __b) {
41754165
return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
41764166
2, 18, 3, 19,
41774167
2+4, 18+4, 3+4, 19+4,
@@ -4195,9 +4185,8 @@ _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
41954185
(__v16sf)_mm512_setzero_ps());
41964186
}
41974187

4198-
static __inline __m512 __DEFAULT_FN_ATTRS512
4199-
_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4200-
{
4188+
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
4189+
_mm512_unpacklo_ps(__m512 __a, __m512 __b) {
42014190
return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
42024191
0, 16, 1, 17,
42034192
0+4, 16+4, 1+4, 17+4,

clang/lib/Headers/avxintrin.h

Lines changed: 24 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,8 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
8787
/// A 256-bit vector of [4 x double] containing one of the source operands.
8888
/// \returns A 256-bit vector of [4 x double] containing the sums of both
8989
/// operands.
90-
static __inline __m256d __DEFAULT_FN_ATTRS
91-
_mm256_add_pd(__m256d __a, __m256d __b)
92-
{
90+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
91+
_mm256_add_pd(__m256d __a, __m256d __b) {
9392
return (__m256d)((__v4df)__a+(__v4df)__b);
9493
}
9594

@@ -105,9 +104,8 @@ _mm256_add_pd(__m256d __a, __m256d __b)
105104
/// A 256-bit vector of [8 x float] containing one of the source operands.
106105
/// \returns A 256-bit vector of [8 x float] containing the sums of both
107106
/// operands.
108-
static __inline __m256 __DEFAULT_FN_ATTRS
109-
_mm256_add_ps(__m256 __a, __m256 __b)
110-
{
107+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_add_ps(__m256 __a,
108+
__m256 __b) {
111109
return (__m256)((__v8sf)__a+(__v8sf)__b);
112110
}
113111

@@ -123,9 +121,8 @@ _mm256_add_ps(__m256 __a, __m256 __b)
123121
/// A 256-bit vector of [4 x double] containing the subtrahend.
124122
/// \returns A 256-bit vector of [4 x double] containing the differences between
125123
/// both operands.
126-
static __inline __m256d __DEFAULT_FN_ATTRS
127-
_mm256_sub_pd(__m256d __a, __m256d __b)
128-
{
124+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
125+
_mm256_sub_pd(__m256d __a, __m256d __b) {
129126
return (__m256d)((__v4df)__a-(__v4df)__b);
130127
}
131128

@@ -141,9 +138,8 @@ _mm256_sub_pd(__m256d __a, __m256d __b)
141138
/// A 256-bit vector of [8 x float] containing the subtrahend.
142139
/// \returns A 256-bit vector of [8 x float] containing the differences between
143140
/// both operands.
144-
static __inline __m256 __DEFAULT_FN_ATTRS
145-
_mm256_sub_ps(__m256 __a, __m256 __b)
146-
{
141+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
142+
__m256 __b) {
147143
return (__m256)((__v8sf)__a-(__v8sf)__b);
148144
}
149145

@@ -197,9 +193,8 @@ _mm256_addsub_ps(__m256 __a, __m256 __b)
197193
/// A 256-bit vector of [4 x double] containing the divisor.
198194
/// \returns A 256-bit vector of [4 x double] containing the quotients of both
199195
/// operands.
200-
static __inline __m256d __DEFAULT_FN_ATTRS
201-
_mm256_div_pd(__m256d __a, __m256d __b)
202-
{
196+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
197+
_mm256_div_pd(__m256d __a, __m256d __b) {
203198
return (__m256d)((__v4df)__a/(__v4df)__b);
204199
}
205200

@@ -215,9 +210,8 @@ _mm256_div_pd(__m256d __a, __m256d __b)
215210
/// A 256-bit vector of [8 x float] containing the divisor.
216211
/// \returns A 256-bit vector of [8 x float] containing the quotients of both
217212
/// operands.
218-
static __inline __m256 __DEFAULT_FN_ATTRS
219-
_mm256_div_ps(__m256 __a, __m256 __b)
220-
{
213+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_div_ps(__m256 __a,
214+
__m256 __b) {
221215
return (__m256)((__v8sf)__a/(__v8sf)__b);
222216
}
223217

@@ -317,9 +311,8 @@ _mm256_min_ps(__m256 __a, __m256 __b)
317311
/// A 256-bit vector of [4 x double] containing one of the operands.
318312
/// \returns A 256-bit vector of [4 x double] containing the products of both
319313
/// operands.
320-
static __inline __m256d __DEFAULT_FN_ATTRS
321-
_mm256_mul_pd(__m256d __a, __m256d __b)
322-
{
314+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
315+
_mm256_mul_pd(__m256d __a, __m256d __b) {
323316
return (__m256d)((__v4df)__a * (__v4df)__b);
324317
}
325318

@@ -335,9 +328,8 @@ _mm256_mul_pd(__m256d __a, __m256d __b)
335328
/// A 256-bit vector of [8 x float] containing one of the operands.
336329
/// \returns A 256-bit vector of [8 x float] containing the products of both
337330
/// operands.
338-
static __inline __m256 __DEFAULT_FN_ATTRS
339-
_mm256_mul_ps(__m256 __a, __m256 __b)
340-
{
331+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_mul_ps(__m256 __a,
332+
__m256 __b) {
341333
return (__m256)((__v8sf)__a * (__v8sf)__b);
342334
}
343335

@@ -2462,9 +2454,8 @@ _mm256_movedup_pd(__m256d __a)
24622454
/// Bits [127:64] are written to bits [127:64] of the return value. \n
24632455
/// Bits [255:192] are written to bits [255:192] of the return value. \n
24642456
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2465-
static __inline __m256d __DEFAULT_FN_ATTRS
2466-
_mm256_unpackhi_pd(__m256d __a, __m256d __b)
2467-
{
2457+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
2458+
_mm256_unpackhi_pd(__m256d __a, __m256d __b) {
24682459
return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);
24692460
}
24702461

@@ -2484,9 +2475,8 @@ _mm256_unpackhi_pd(__m256d __a, __m256d __b)
24842475
/// Bits [63:0] are written to bits [127:64] of the return value. \n
24852476
/// Bits [191:128] are written to bits [255:192] of the return value. \n
24862477
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
2487-
static __inline __m256d __DEFAULT_FN_ATTRS
2488-
_mm256_unpacklo_pd(__m256d __a, __m256d __b)
2489-
{
2478+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
2479+
_mm256_unpacklo_pd(__m256d __a, __m256d __b) {
24902480
return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);
24912481
}
24922482

@@ -2511,9 +2501,8 @@ _mm256_unpacklo_pd(__m256d __a, __m256d __b)
25112501
/// Bits [223:192] are written to bits [191:160] of the return value. \n
25122502
/// Bits [255:224] are written to bits [255:224] of the return value.
25132503
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2514-
static __inline __m256 __DEFAULT_FN_ATTRS
2515-
_mm256_unpackhi_ps(__m256 __a, __m256 __b)
2516-
{
2504+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
2505+
_mm256_unpackhi_ps(__m256 __a, __m256 __b) {
25172506
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
25182507
}
25192508

@@ -2538,9 +2527,8 @@ _mm256_unpackhi_ps(__m256 __a, __m256 __b)
25382527
/// Bits [159:128] are written to bits [191:160] of the return value. \n
25392528
/// Bits [191:160] are written to bits [255:224] of the return value.
25402529
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
2541-
static __inline __m256 __DEFAULT_FN_ATTRS
2542-
_mm256_unpacklo_ps(__m256 __a, __m256 __b)
2543-
{
2530+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
2531+
_mm256_unpacklo_ps(__m256 __a, __m256 __b) {
25442532
return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
25452533
}
25462534

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ __m256d test_mm256_add_pd(__m256d A, __m256d B) {
2020
// CHECK: fadd <4 x double>
2121
return _mm256_add_pd(A, B);
2222
}
23+
TEST_CONSTEXPR(match_m256d( _mm256_add_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), -8.0, -10.0, +12.0, +14.0));
2324

2425
__m256 test_mm256_add_ps(__m256 A, __m256 B) {
2526
// CHECK-LABEL: test_mm256_add_ps
2627
// CHECK: fadd <8 x float>
2728
return _mm256_add_ps(A, B);
2829
}
30+
TEST_CONSTEXPR(match_m256(_mm256_add_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), -8.0f, -10.0f, +12.0f, +14.0f, +14.0f, +12.0f, -10.0f, -8.0f));
2931

3032
__m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
3133
// CHECK-LABEL: test_mm256_addsub_pd
@@ -973,12 +975,14 @@ __m256d test_mm256_div_pd(__m256d A, __m256d B) {
973975
// CHECK: fdiv <4 x double>
974976
return _mm256_div_pd(A, B);
975977
}
978+
TEST_CONSTEXPR(match_m256d( _mm256_div_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-1.0, +1.0, -1.0, +1.0}), +4.0, -5.0, -6.0, +7.0));
976979

977980
__m256 test_mm256_div_ps(__m256 A, __m256 B) {
978981
// CHECK-LABEL: test_mm256_div_ps
979982
// CHECK: fdiv <8 x float>
980983
return _mm256_div_ps(A, B);
981984
}
985+
TEST_CONSTEXPR(match_m256( _mm256_div_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-1.0f, +1.0f, -1.0f, +1.0f, +1.0f, -1.0f, +1.0f, -1.0f}), +4.0f, -5.0f, -6.0f, +7.0f, +7.0f, -6.0f, -5.0f, +4.0f));
982986

983987
__m256 test_mm256_dp_ps(__m256 A, __m256 B) {
984988
// CHECK-LABEL: test_mm256_dp_ps
@@ -1288,12 +1292,14 @@ __m256d test_mm256_mul_pd(__m256d A, __m256d B) {
12881292
// CHECK: fmul <4 x double>
12891293
return _mm256_mul_pd(A, B);
12901294
}
1295+
TEST_CONSTEXPR(match_m256d( _mm256_mul_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-4.0, -5.0, +6.0, +7.0}), +16.0, +25.0, +36.0, +49.0));
12911296

12921297
__m256 test_mm256_mul_ps(__m256 A, __m256 B) {
12931298
// CHECK-LABEL: test_mm256_mul_ps
12941299
// CHECK: fmul <8 x float>
12951300
return _mm256_mul_ps(A, B);
12961301
}
1302+
TEST_CONSTEXPR(match_m256( _mm256_mul_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}), +16.0f, +25.0f, +36.0f, +49.0f, +49.0f, +36.0f, +25.0f, +16.0f));
12971303

12981304
__m256d test_mm256_or_pd(__m256d A, __m256d B) {
12991305
// CHECK-LABEL: test_mm256_or_pd
@@ -1924,12 +1930,14 @@ __m256d test_mm256_sub_pd(__m256d A, __m256d B) {
19241930
// CHECK: fsub <4 x double>
19251931
return _mm256_sub_pd(A, B);
19261932
}
1933+
TEST_CONSTEXPR(match_m256d( _mm256_sub_pd((__m256d){-4.0, -5.0, +6.0, +7.0}, (__m256d){-0.0, +0.0, +2.0, -1.0}), -4.0, -5.0, 4.0, 8.0));
19271934

19281935
__m256 test_mm256_sub_ps(__m256 A, __m256 B) {
19291936
// CHECK-LABEL: test_mm256_sub_ps
19301937
// CHECK: fsub <8 x float>
19311938
return _mm256_sub_ps(A, B);
19321939
}
1940+
TEST_CONSTEXPR(match_m256( _mm256_sub_ps((__m256){-4.0f, -5.0f, +6.0f, +7.0f, +7.0f, +6.0f, -5.0f, -4.0f}, (__m256){-0.0f, +0.0f, +2.0f, -1.0f, -1.0f, +2.0f, +0.0f, -0.0f}), -4.0f, -5.0f, 4.0f, 8.0f, 8.0f, 4.0f, -5.0f, -4.0f));
19331941

19341942
int test_mm_testc_pd(__m128d A, __m128d B) {
19351943
// CHECK-LABEL: test_mm_testc_pd
@@ -2053,24 +2061,28 @@ __m256d test_mm256_unpackhi_pd(__m256d A, __m256d B) {
20532061
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20542062
return _mm256_unpackhi_pd(A, B);
20552063
}
2064+
TEST_CONSTEXPR(match_m256d(_mm256_unpackhi_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +4.0, +8.0, +3.0, +7.0));
20562065

20572066
__m256 test_mm256_unpackhi_ps(__m256 A, __m256 B) {
20582067
// CHECK-LABEL: test_mm256_unpackhi_ps
20592068
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
20602069
return _mm256_unpackhi_ps(A, B);
20612070
}
2071+
TEST_CONSTEXPR(match_m256(_mm256_unpackhi_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +2.0f, +12.0f, +3.0f, +13.0f, +6.0f, +16.0f, +7.0f, +17.0f));
20622072

20632073
__m256d test_mm256_unpacklo_pd(__m256d A, __m256d B) {
20642074
// CHECK-LABEL: test_mm256_unpacklo_pd
20652075
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20662076
return _mm256_unpacklo_pd(A, B);
20672077
}
2078+
TEST_CONSTEXPR(match_m256d(_mm256_unpacklo_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+5.0, +6.0, +7.0, +8.0}), +1.0, +5.0, +2.0, +6.0));
20682079

20692080
__m256 test_mm256_unpacklo_ps(__m256 A, __m256 B) {
20702081
// CHECK-LABEL: test_mm256_unpacklo_ps
20712082
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
20722083
return _mm256_unpacklo_ps(A, B);
20732084
}
2085+
TEST_CONSTEXPR(match_m256(_mm256_unpacklo_ps((__m256){+0.0f, +1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f}, (__m256){+10.0f, +11.0f, +12.0f, +13.0f, +14.0f, +15.0f, +16.0f, +17.0f}), +0.0f, +10.0f, +1.0f, +11.0f, +4.0f, +14.0f, +5.0f, +15.0f));
20742086

20752087
__m256d test_mm256_xor_pd(__m256d A, __m256d B) {
20762088
// CHECK-LABEL: test_mm256_xor_pd

0 commit comments

Comments
 (0)