Skip to content

Commit f537792

Browse files
authored
[X86] Refactor the SSE intrinsics constexpr tests to simplify future expansion (llvm#112578)
I'm hoping to make a large proportion of the SSE/AVX intrinsics usable in constant expressions - eventually anything that doesn't touch memory or system settings - making it much easier to utilize SSE/AVX intrinsics in various math libraries etc. My initial implementation placed the tests at the end of the test file, similar to how smaller files already handle their tests. However, what I'm finding is that this approach doesn't scale when trying to track coverage of so many intrinsics - many keep getting missed, and it gets messy; so what I'm proposing is to instead keep each intrinsic's generic IR test and its constexpr tests together to make them easier to track together, wrapping the static_assert inside a macro to disable on C and pre-C++11 tests. I'm open to alternative suggestions before I invest too much time getting this work done :)
1 parent 32aa782 commit f537792

File tree

4 files changed

+95
-219
lines changed

4 files changed

+95
-219
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/* Helper methods for builtin intrinsic tests */
2+
3+
#include <immintrin.h>
4+
5+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
6+
7+
constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
8+
return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
9+
}
10+
11+
constexpr bool match_m128d(__m128d v, double x, double y) {
12+
return v[0] == x && v[1] == y;
13+
}
14+
15+
constexpr bool match_m128i(__m128i v, unsigned long long x, unsigned long long y) {
16+
return v[0] == x && v[1] == y;
17+
}
18+
19+
#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
20+
21+
#else
22+
23+
#define TEST_CONSTEXPR(...)
24+
25+
#endif

clang/test/CodeGen/X86/sse-builtins.c

Lines changed: 35 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
#include <immintrin.h>
8+
#include "builtin_test_helpers.h"
89

910
// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
1011

@@ -13,6 +14,7 @@ __m128 test_mm_add_ps(__m128 A, __m128 B) {
1314
// CHECK: fadd <4 x float>
1415
return _mm_add_ps(A, B);
1516
}
17+
TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f));
1618

1719
__m128 test_mm_add_ss(__m128 A, __m128 B) {
1820
// CHECK-LABEL: test_mm_add_ss
@@ -22,19 +24,22 @@ __m128 test_mm_add_ss(__m128 A, __m128 B) {
2224
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
2325
return _mm_add_ss(A, B);
2426
}
27+
TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f));
2528

2629
__m128 test_mm_and_ps(__m128 A, __m128 B) {
2730
// CHECK-LABEL: test_mm_and_ps
2831
// CHECK: and <4 x i32>
2932
return _mm_and_ps(A, B);
3033
}
34+
TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f));
3135

3236
__m128 test_mm_andnot_ps(__m128 A, __m128 B) {
3337
// CHECK-LABEL: test_mm_andnot_ps
3438
// CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
3539
// CHECK: and <4 x i32>
3640
return _mm_andnot_ps(A, B);
3741
}
42+
TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
3843

3944
__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
4045
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
@@ -322,6 +327,15 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
322327
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
323328
return _mm_cvtsi32_ss(A, B);
324329
}
330+
TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f));
331+
332+
__m128 test_mm_cvt_si2ss(__m128 A, int B) {
333+
// CHECK-LABEL: test_mm_cvt_si2ss
334+
// CHECK: sitofp i32 %{{.*}} to float
335+
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
336+
return _mm_cvt_si2ss(A, B);
337+
}
338+
TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f));
325339

326340
#ifdef __x86_64__
327341
__m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
@@ -330,13 +344,15 @@ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
330344
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
331345
return _mm_cvtsi64_ss(A, B);
332346
}
347+
TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f));
333348
#endif
334349

335350
float test_mm_cvtss_f32(__m128 A) {
336351
// CHECK-LABEL: test_mm_cvtss_f32
337352
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
338353
return _mm_cvtss_f32(A);
339354
}
355+
TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f);
340356

341357
int test_mm_cvtss_si32(__m128 A) {
342358
// CHECK-LABEL: test_mm_cvtss_si32
@@ -377,6 +393,7 @@ __m128 test_mm_div_ps(__m128 A, __m128 B) {
377393
// CHECK: fdiv <4 x float>
378394
return _mm_div_ps(A, B);
379395
}
396+
TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f));
380397

381398
__m128 test_mm_div_ss(__m128 A, __m128 B) {
382399
// CHECK-LABEL: test_mm_div_ss
@@ -386,6 +403,7 @@ __m128 test_mm_div_ss(__m128 A, __m128 B) {
386403
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
387404
return _mm_div_ss(A, B);
388405
}
406+
TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f));
389407

390408
unsigned int test_MM_GET_EXCEPTION_MASK(void) {
391409
// CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
@@ -517,18 +535,21 @@ __m128 test_mm_move_ss(__m128 A, __m128 B) {
517535
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
518536
return _mm_move_ss(A, B);
519537
}
538+
TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f));
520539

521540
__m128 test_mm_movehl_ps(__m128 A, __m128 B) {
522541
// CHECK-LABEL: test_mm_movehl_ps
523542
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
524543
return _mm_movehl_ps(A, B);
525544
}
545+
TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f));
526546

527547
__m128 test_mm_movelh_ps(__m128 A, __m128 B) {
528548
// CHECK-LABEL: test_mm_movelh_ps
529549
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
530550
return _mm_movelh_ps(A, B);
531551
}
552+
TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f));
532553

533554
int test_mm_movemask_ps(__m128 A) {
534555
// CHECK-LABEL: test_mm_movemask_ps
@@ -541,6 +562,7 @@ __m128 test_mm_mul_ps(__m128 A, __m128 B) {
541562
// CHECK: fmul <4 x float>
542563
return _mm_mul_ps(A, B);
543564
}
565+
TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
544566

545567
__m128 test_mm_mul_ss(__m128 A, __m128 B) {
546568
// CHECK-LABEL: test_mm_mul_ss
@@ -550,12 +572,14 @@ __m128 test_mm_mul_ss(__m128 A, __m128 B) {
550572
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
551573
return _mm_mul_ss(A, B);
552574
}
575+
TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
553576

554577
__m128 test_mm_or_ps(__m128 A, __m128 B) {
555578
// CHECK-LABEL: test_mm_or_ps
556579
// CHECK: or <4 x i32>
557580
return _mm_or_ps(A, B);
558581
}
582+
TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f));
559583

560584
void test_mm_prefetch(char const* p) {
561585
// CHECK-LABEL: test_mm_prefetch
@@ -628,6 +652,7 @@ __m128 test_mm_set_ps(float A, float B, float C, float D) {
628652
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
629653
return _mm_set_ps(A, B, C, D);
630654
}
655+
TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f));
631656

632657
__m128 test_mm_set_ps1(float A) {
633658
// CHECK-LABEL: test_mm_set_ps1
@@ -637,6 +662,7 @@ __m128 test_mm_set_ps1(float A) {
637662
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
638663
return _mm_set_ps1(A);
639664
}
665+
TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f));
640666

641667
void test_MM_SET_ROUNDING_MODE(unsigned int A) {
642668
// CHECK-LABEL: test_MM_SET_ROUNDING_MODE
@@ -657,6 +683,7 @@ __m128 test_mm_set_ss(float A) {
657683
// CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
658684
return _mm_set_ss(A);
659685
}
686+
TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f));
660687

661688
__m128 test_mm_set1_ps(float A) {
662689
// CHECK-LABEL: test_mm_set1_ps
@@ -666,6 +693,7 @@ __m128 test_mm_set1_ps(float A) {
666693
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
667694
return _mm_set1_ps(A);
668695
}
696+
TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f));
669697

670698
void test_mm_setcsr(unsigned int A) {
671699
// CHECK-LABEL: test_mm_setcsr
@@ -682,12 +710,14 @@ __m128 test_mm_setr_ps(float A, float B, float C, float D) {
682710
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
683711
return _mm_setr_ps(A, B, C, D);
684712
}
713+
TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f));
685714

686715
__m128 test_mm_setzero_ps(void) {
687716
// CHECK-LABEL: test_mm_setzero_ps
688717
// CHECK: store <4 x float> zeroinitializer
689718
return _mm_setzero_ps();
690719
}
720+
TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f));
691721

692722
void test_mm_sfence(void) {
693723
// CHECK-LABEL: test_mm_sfence
@@ -787,6 +817,7 @@ __m128 test_mm_sub_ps(__m128 A, __m128 B) {
787817
// CHECK: fsub <4 x float>
788818
return _mm_sub_ps(A, B);
789819
}
820+
TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f));
790821

791822
__m128 test_mm_sub_ss(__m128 A, __m128 B) {
792823
// CHECK-LABEL: test_mm_sub_ss
@@ -796,6 +827,7 @@ __m128 test_mm_sub_ss(__m128 A, __m128 B) {
796827
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
797828
return _mm_sub_ss(A, B);
798829
}
830+
TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f));
799831

800832
void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
801833
// CHECK-LABEL: test_MM_TRANSPOSE4_PS
@@ -857,107 +889,18 @@ __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
857889
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
858890
return _mm_unpackhi_ps(A, B);
859891
}
892+
TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f));
860893

861894
__m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
862895
// CHECK-LABEL: test_mm_unpacklo_ps
863896
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
864897
return _mm_unpacklo_ps(A, B);
865898
}
899+
TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f));
866900

867901
__m128 test_mm_xor_ps(__m128 A, __m128 B) {
868902
// CHECK-LABEL: test_mm_xor_ps
869903
// CHECK: xor <4 x i32>
870904
return _mm_xor_ps(A, B);
871905
}
872-
873-
// Test constexpr handling.
874-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
875-
876-
void test_constexpr() {
877-
constexpr __m128 k1 {+1.0f,+0.0f,+2.0f,+4.0f};
878-
constexpr __m128 k2 {+8.0f,+4.0f,+2.0f,+1.0f};
879-
constexpr __m128 k3 {-4.0f,-5.0f,+6.0f,+7.0f};
880-
constexpr __m128 k4 {+0.0f,-0.0f,-0.0f,+0.0f};
881-
882-
constexpr __m128 v_mm_set_ss = _mm_set_ss(1.0f);
883-
static_assert(v_mm_set_ss[0] == +1.0f && v_mm_set_ss[1] == +0.0f && v_mm_set_ss[2] == +0.0f && v_mm_set_ss[3] == +0.0f);
884-
885-
constexpr __m128 v_mm_set1_ps = _mm_set1_ps(2.0f);
886-
static_assert(v_mm_set1_ps[0] == +2.0f && v_mm_set1_ps[1] == +2.0f && v_mm_set1_ps[2] == +2.0f && v_mm_set1_ps[3] == +2.0f);
887-
888-
constexpr __m128 v_mm_set_ps1 = _mm_set_ps1(-2.0f);
889-
static_assert(v_mm_set_ps1[0] == -2.0f && v_mm_set_ps1[1] == -2.0f && v_mm_set_ps1[2] == -2.0f && v_mm_set_ps1[3] == -2.0f);
890-
891-
constexpr __m128 v_mm_set_ps = _mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f);
892-
static_assert(v_mm_set_ps[0] == +3.0f && v_mm_set_ps[1] == +2.0f && v_mm_set_ps[2] == +1.0f && v_mm_set_ps[3] == +0.0f);
893-
894-
constexpr __m128 v_mm_setr_ps = _mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f);
895-
static_assert(v_mm_setr_ps[0] == +0.0f && v_mm_setr_ps[1] == +1.0f && v_mm_setr_ps[2] == +2.0f && v_mm_setr_ps[3] == +3.0f);
896-
897-
constexpr __m128 v_mm_setzero_ps = _mm_setzero_ps();
898-
static_assert(v_mm_setzero_ps[0] == +0.0f && v_mm_setzero_ps[1] == +0.0f && v_mm_setzero_ps[2] == +0.0f && v_mm_setzero_ps[3] == +0.0f);
899-
900-
constexpr __m128 v_mm_add_ss = _mm_add_ss(k1, k2);
901-
static_assert(v_mm_add_ss[0] == +9.0f && v_mm_add_ss[1] == +0.0f && v_mm_add_ss[2] == +2.0f && v_mm_add_ss[3] == +4.0f);
902-
903-
constexpr __m128 v_mm_add_ps = _mm_add_ps(k1, k2);
904-
static_assert(v_mm_add_ps[0] == +9.0f && v_mm_add_ps[1] == +4.0f && v_mm_add_ps[2] == +4.0f && v_mm_add_ps[3] == +5.0f);
905-
906-
constexpr __m128 v_mm_sub_ss = _mm_sub_ss(k1, k2);
907-
static_assert(v_mm_sub_ss[0] == -7.0f && v_mm_sub_ss[1] == +0.0f && v_mm_sub_ss[2] == +2.0f && v_mm_sub_ss[3] == +4.0f);
908-
909-
constexpr __m128 v_mm_sub_ps = _mm_sub_ps(k1, k2);
910-
static_assert(v_mm_sub_ps[0] == -7.0f && v_mm_sub_ps[1] == -4.0f && v_mm_sub_ps[2] == +0.0f && v_mm_sub_ps[3] == +3.0f);
911-
912-
constexpr __m128 v_mm_mul_ss = _mm_mul_ss(k1, k2);
913-
static_assert(v_mm_mul_ss[0] == +8.0f && v_mm_mul_ss[1] == +0.0f && v_mm_mul_ss[2] == +2.0f && v_mm_mul_ss[3] == +4.0f);
914-
915-
constexpr __m128 v_mm_mul_ps = _mm_mul_ps(k1, k2);
916-
static_assert(v_mm_mul_ps[0] == +8.0f && v_mm_mul_ps[1] == +0.0f && v_mm_mul_ps[2] == +4.0f && v_mm_mul_ps[3] == +4.0f);
917-
918-
constexpr __m128 v_mm_div_ss = _mm_div_ss(k1, k2);
919-
static_assert(v_mm_div_ss[0] == +0.125f && v_mm_div_ss[1] == +0.0f && v_mm_div_ss[2] == +2.0f && v_mm_div_ss[3] == +4.0f);
920-
921-
constexpr __m128 v_mm_div_ps = _mm_div_ps(k1, k2);
922-
static_assert(v_mm_div_ps[0] == +0.125f && v_mm_div_ps[1] == +0.0f && v_mm_div_ps[2] == +1.0f && v_mm_div_ps[3] == +4.0f);
923-
924-
constexpr __m128 v_mm_and_ps = _mm_and_ps(k3, k4);
925-
static_assert(v_mm_and_ps[0] == +0.0f && v_mm_and_ps[1] == +0.0f && v_mm_and_ps[2] == +0.0f && v_mm_and_ps[3] == +0.0f);
926-
927-
constexpr __m128 v_mm_andnot_ps = _mm_andnot_ps(k3, k4);
928-
static_assert(v_mm_andnot_ps[0] == +0.0f && v_mm_andnot_ps[1] == +0.0f && v_mm_andnot_ps[2] == +0.0f && v_mm_andnot_ps[3] == +0.0f);
929-
930-
constexpr __m128 v_mm_or_ps = _mm_or_ps(k3, k4);
931-
static_assert(v_mm_or_ps[0] == -4.0f && v_mm_or_ps[1] == -5.0f && v_mm_or_ps[2] == -6.0f && v_mm_or_ps[3] == +7.0f);
932-
933-
constexpr __m128 v_mm_xor_ps = _mm_xor_ps(k3, k4);
934-
static_assert(v_mm_xor_ps[0] == -4.0f && v_mm_xor_ps[1] == +5.0f && v_mm_xor_ps[2] == -6.0f && v_mm_xor_ps[3] == +7.0f);
935-
936-
constexpr __m128 v_mm_unpackhi_ps = _mm_unpackhi_ps(k1, k2);
937-
static_assert(v_mm_unpackhi_ps[0] == +2.0f && v_mm_unpackhi_ps[1] == +2.0f && v_mm_unpackhi_ps[2] == +4.0f && v_mm_unpackhi_ps[3] == +1.0f);
938-
939-
constexpr __m128 v_mm_unpacklo_ps = _mm_unpacklo_ps(k1, k2);
940-
static_assert(v_mm_unpacklo_ps[0] == +1.0f && v_mm_unpacklo_ps[1] == +8.0f && v_mm_unpacklo_ps[2] == +0.0f && v_mm_unpacklo_ps[3] == +4.0f);
941-
942-
constexpr __m128 v_mm_move_ss = _mm_move_ss(k1, k2);
943-
static_assert(v_mm_move_ss[0] == +8.0f && v_mm_move_ss[1] == +0.0f && v_mm_move_ss[2] == +2.0f && v_mm_move_ss[3] == +4.0f);
944-
945-
constexpr __m128 v_mm_movehl_ps = _mm_movehl_ps(k1, k2);
946-
static_assert(v_mm_movehl_ps[0] == +2.0f && v_mm_movehl_ps[1] == +1.0f && v_mm_movehl_ps[2] == +2.0f && v_mm_movehl_ps[3] == +4.0f);
947-
948-
constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2);
949-
static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f);
950-
951-
constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42);
952-
static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f);
953-
954-
constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99);
955-
static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f);
956-
957-
constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555);
958-
static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f);
959-
960-
static_assert(_mm_cvtss_f32(k2) == +8.0f);
961-
}
962-
963-
#endif
906+
TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f));

0 commit comments

Comments
 (0)