Skip to content

Commit 00231ce

Browse files
committed
[X86][RFC] Refactor the SSE intrinsics constexpr tests to simplify future expansion
I'm hoping to make a large proportion of the SSE/AVX intrinsics usable in constant expressions - eventually anything that doesn't touch memory or system settings - making it much easier to utilise SSE/AVX intrinsics in various math libraries etc. My initial implementation placed the tests at the end of the test file, similar to how smaller files already handle their tests. However, what I'm finding is that this approach doesn't scale when trying to track coverage of so many intrinsics - many keep getting missed, and it gets messy; so what I'm proposing is to instead keep each intrinsic's generic IR test and its constexpr tests together to make them easier to track together, wrapping the static_assert inside a macro to disable on C and pre-C++11 tests. I'm open to alternative suggestions before I invest too much time getting this work done :)
1 parent cf5e295 commit 00231ce

File tree

3 files changed

+103
-219
lines changed

3 files changed

+103
-219
lines changed

clang/test/CodeGen/X86/sse-builtins.c

Lines changed: 43 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,23 @@
66

77
#include <immintrin.h>
88

9+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
10+
constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
11+
return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
12+
}
13+
#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
14+
#else
15+
#define TEST_CONSTEXPR(...)
16+
#endif
17+
918
// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
1019

1120
__m128 test_mm_add_ps(__m128 A, __m128 B) {
1221
// CHECK-LABEL: test_mm_add_ps
1322
// CHECK: fadd <4 x float>
1423
return _mm_add_ps(A, B);
1524
}
25+
TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f));
1626

1727
__m128 test_mm_add_ss(__m128 A, __m128 B) {
1828
// CHECK-LABEL: test_mm_add_ss
@@ -22,19 +32,22 @@ __m128 test_mm_add_ss(__m128 A, __m128 B) {
2232
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
2333
return _mm_add_ss(A, B);
2434
}
35+
TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f));
2536

2637
__m128 test_mm_and_ps(__m128 A, __m128 B) {
2738
// CHECK-LABEL: test_mm_and_ps
2839
// CHECK: and <4 x i32>
2940
return _mm_and_ps(A, B);
3041
}
42+
TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f));
3143

3244
__m128 test_mm_andnot_ps(__m128 A, __m128 B) {
3345
// CHECK-LABEL: test_mm_andnot_ps
3446
// CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
3547
// CHECK: and <4 x i32>
3648
return _mm_andnot_ps(A, B);
3749
}
50+
TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
3851

3952
__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
4053
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
@@ -322,6 +335,15 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
322335
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
323336
return _mm_cvtsi32_ss(A, B);
324337
}
338+
TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f));
339+
340+
__m128 test_mm_cvt_si2ss(__m128 A, int B) {
341+
// CHECK-LABEL: test_mm_cvt_si2ss
342+
// CHECK: sitofp i32 %{{.*}} to float
343+
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
344+
return _mm_cvt_si2ss(A, B);
345+
}
346+
TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f));
325347

326348
#ifdef __x86_64__
327349
__m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
@@ -330,13 +352,15 @@ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
330352
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
331353
return _mm_cvtsi64_ss(A, B);
332354
}
355+
TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f));
333356
#endif
334357

335358
float test_mm_cvtss_f32(__m128 A) {
336359
// CHECK-LABEL: test_mm_cvtss_f32
337360
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
338361
return _mm_cvtss_f32(A);
339362
}
363+
TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f);
340364

341365
int test_mm_cvtss_si32(__m128 A) {
342366
// CHECK-LABEL: test_mm_cvtss_si32
@@ -377,6 +401,7 @@ __m128 test_mm_div_ps(__m128 A, __m128 B) {
377401
// CHECK: fdiv <4 x float>
378402
return _mm_div_ps(A, B);
379403
}
404+
TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f));
380405

381406
__m128 test_mm_div_ss(__m128 A, __m128 B) {
382407
// CHECK-LABEL: test_mm_div_ss
@@ -386,6 +411,7 @@ __m128 test_mm_div_ss(__m128 A, __m128 B) {
386411
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
387412
return _mm_div_ss(A, B);
388413
}
414+
TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f));
389415

390416
unsigned int test_MM_GET_EXCEPTION_MASK(void) {
391417
// CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
@@ -517,18 +543,21 @@ __m128 test_mm_move_ss(__m128 A, __m128 B) {
517543
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
518544
return _mm_move_ss(A, B);
519545
}
546+
TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f));
520547

521548
__m128 test_mm_movehl_ps(__m128 A, __m128 B) {
522549
// CHECK-LABEL: test_mm_movehl_ps
523550
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
524551
return _mm_movehl_ps(A, B);
525552
}
553+
TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f));
526554

527555
__m128 test_mm_movelh_ps(__m128 A, __m128 B) {
528556
// CHECK-LABEL: test_mm_movelh_ps
529557
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
530558
return _mm_movelh_ps(A, B);
531559
}
560+
TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f));
532561

533562
int test_mm_movemask_ps(__m128 A) {
534563
// CHECK-LABEL: test_mm_movemask_ps
@@ -541,6 +570,7 @@ __m128 test_mm_mul_ps(__m128 A, __m128 B) {
541570
// CHECK: fmul <4 x float>
542571
return _mm_mul_ps(A, B);
543572
}
573+
TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
544574

545575
__m128 test_mm_mul_ss(__m128 A, __m128 B) {
546576
// CHECK-LABEL: test_mm_mul_ss
@@ -550,12 +580,14 @@ __m128 test_mm_mul_ss(__m128 A, __m128 B) {
550580
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
551581
return _mm_mul_ss(A, B);
552582
}
583+
TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
553584

554585
__m128 test_mm_or_ps(__m128 A, __m128 B) {
555586
// CHECK-LABEL: test_mm_or_ps
556587
// CHECK: or <4 x i32>
557588
return _mm_or_ps(A, B);
558589
}
590+
TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f));
559591

560592
void test_mm_prefetch(char const* p) {
561593
// CHECK-LABEL: test_mm_prefetch
@@ -628,6 +660,7 @@ __m128 test_mm_set_ps(float A, float B, float C, float D) {
628660
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
629661
return _mm_set_ps(A, B, C, D);
630662
}
663+
TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f));
631664

632665
__m128 test_mm_set_ps1(float A) {
633666
// CHECK-LABEL: test_mm_set_ps1
@@ -637,6 +670,7 @@ __m128 test_mm_set_ps1(float A) {
637670
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
638671
return _mm_set_ps1(A);
639672
}
673+
TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f));
640674

641675
void test_MM_SET_ROUNDING_MODE(unsigned int A) {
642676
// CHECK-LABEL: test_MM_SET_ROUNDING_MODE
@@ -657,6 +691,7 @@ __m128 test_mm_set_ss(float A) {
657691
// CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
658692
return _mm_set_ss(A);
659693
}
694+
TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f));
660695

661696
__m128 test_mm_set1_ps(float A) {
662697
// CHECK-LABEL: test_mm_set1_ps
@@ -666,6 +701,7 @@ __m128 test_mm_set1_ps(float A) {
666701
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
667702
return _mm_set1_ps(A);
668703
}
704+
TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f));
669705

670706
void test_mm_setcsr(unsigned int A) {
671707
// CHECK-LABEL: test_mm_setcsr
@@ -682,12 +718,14 @@ __m128 test_mm_setr_ps(float A, float B, float C, float D) {
682718
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
683719
return _mm_setr_ps(A, B, C, D);
684720
}
721+
TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f));
685722

686723
__m128 test_mm_setzero_ps(void) {
687724
// CHECK-LABEL: test_mm_setzero_ps
688725
// CHECK: store <4 x float> zeroinitializer
689726
return _mm_setzero_ps();
690727
}
728+
TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f));
691729

692730
void test_mm_sfence(void) {
693731
// CHECK-LABEL: test_mm_sfence
@@ -787,6 +825,7 @@ __m128 test_mm_sub_ps(__m128 A, __m128 B) {
787825
// CHECK: fsub <4 x float>
788826
return _mm_sub_ps(A, B);
789827
}
828+
TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f));
790829

791830
__m128 test_mm_sub_ss(__m128 A, __m128 B) {
792831
// CHECK-LABEL: test_mm_sub_ss
@@ -796,6 +835,7 @@ __m128 test_mm_sub_ss(__m128 A, __m128 B) {
796835
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
797836
return _mm_sub_ss(A, B);
798837
}
838+
TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f));
799839

800840
void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
801841
// CHECK-LABEL: test_MM_TRANSPOSE4_PS
@@ -857,107 +897,18 @@ __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
857897
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
858898
return _mm_unpackhi_ps(A, B);
859899
}
900+
TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f));
860901

861902
__m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
862903
// CHECK-LABEL: test_mm_unpacklo_ps
863904
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
864905
return _mm_unpacklo_ps(A, B);
865906
}
907+
TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f));
866908

867909
__m128 test_mm_xor_ps(__m128 A, __m128 B) {
868910
// CHECK-LABEL: test_mm_xor_ps
869911
// CHECK: xor <4 x i32>
870912
return _mm_xor_ps(A, B);
871913
}
872-
873-
// Test constexpr handling.
874-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
875-
876-
void test_constexpr() {
877-
constexpr __m128 k1 {+1.0f,+0.0f,+2.0f,+4.0f};
878-
constexpr __m128 k2 {+8.0f,+4.0f,+2.0f,+1.0f};
879-
constexpr __m128 k3 {-4.0f,-5.0f,+6.0f,+7.0f};
880-
constexpr __m128 k4 {+0.0f,-0.0f,-0.0f,+0.0f};
881-
882-
constexpr __m128 v_mm_set_ss = _mm_set_ss(1.0f);
883-
static_assert(v_mm_set_ss[0] == +1.0f && v_mm_set_ss[1] == +0.0f && v_mm_set_ss[2] == +0.0f && v_mm_set_ss[3] == +0.0f);
884-
885-
constexpr __m128 v_mm_set1_ps = _mm_set1_ps(2.0f);
886-
static_assert(v_mm_set1_ps[0] == +2.0f && v_mm_set1_ps[1] == +2.0f && v_mm_set1_ps[2] == +2.0f && v_mm_set1_ps[3] == +2.0f);
887-
888-
constexpr __m128 v_mm_set_ps1 = _mm_set_ps1(-2.0f);
889-
static_assert(v_mm_set_ps1[0] == -2.0f && v_mm_set_ps1[1] == -2.0f && v_mm_set_ps1[2] == -2.0f && v_mm_set_ps1[3] == -2.0f);
890-
891-
constexpr __m128 v_mm_set_ps = _mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f);
892-
static_assert(v_mm_set_ps[0] == +3.0f && v_mm_set_ps[1] == +2.0f && v_mm_set_ps[2] == +1.0f && v_mm_set_ps[3] == +0.0f);
893-
894-
constexpr __m128 v_mm_setr_ps = _mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f);
895-
static_assert(v_mm_setr_ps[0] == +0.0f && v_mm_setr_ps[1] == +1.0f && v_mm_setr_ps[2] == +2.0f && v_mm_setr_ps[3] == +3.0f);
896-
897-
constexpr __m128 v_mm_setzero_ps = _mm_setzero_ps();
898-
static_assert(v_mm_setzero_ps[0] == +0.0f && v_mm_setzero_ps[1] == +0.0f && v_mm_setzero_ps[2] == +0.0f && v_mm_setzero_ps[3] == +0.0f);
899-
900-
constexpr __m128 v_mm_add_ss = _mm_add_ss(k1, k2);
901-
static_assert(v_mm_add_ss[0] == +9.0f && v_mm_add_ss[1] == +0.0f && v_mm_add_ss[2] == +2.0f && v_mm_add_ss[3] == +4.0f);
902-
903-
constexpr __m128 v_mm_add_ps = _mm_add_ps(k1, k2);
904-
static_assert(v_mm_add_ps[0] == +9.0f && v_mm_add_ps[1] == +4.0f && v_mm_add_ps[2] == +4.0f && v_mm_add_ps[3] == +5.0f);
905-
906-
constexpr __m128 v_mm_sub_ss = _mm_sub_ss(k1, k2);
907-
static_assert(v_mm_sub_ss[0] == -7.0f && v_mm_sub_ss[1] == +0.0f && v_mm_sub_ss[2] == +2.0f && v_mm_sub_ss[3] == +4.0f);
908-
909-
constexpr __m128 v_mm_sub_ps = _mm_sub_ps(k1, k2);
910-
static_assert(v_mm_sub_ps[0] == -7.0f && v_mm_sub_ps[1] == -4.0f && v_mm_sub_ps[2] == +0.0f && v_mm_sub_ps[3] == +3.0f);
911-
912-
constexpr __m128 v_mm_mul_ss = _mm_mul_ss(k1, k2);
913-
static_assert(v_mm_mul_ss[0] == +8.0f && v_mm_mul_ss[1] == +0.0f && v_mm_mul_ss[2] == +2.0f && v_mm_mul_ss[3] == +4.0f);
914-
915-
constexpr __m128 v_mm_mul_ps = _mm_mul_ps(k1, k2);
916-
static_assert(v_mm_mul_ps[0] == +8.0f && v_mm_mul_ps[1] == +0.0f && v_mm_mul_ps[2] == +4.0f && v_mm_mul_ps[3] == +4.0f);
917-
918-
constexpr __m128 v_mm_div_ss = _mm_div_ss(k1, k2);
919-
static_assert(v_mm_div_ss[0] == +0.125f && v_mm_div_ss[1] == +0.0f && v_mm_div_ss[2] == +2.0f && v_mm_div_ss[3] == +4.0f);
920-
921-
constexpr __m128 v_mm_div_ps = _mm_div_ps(k1, k2);
922-
static_assert(v_mm_div_ps[0] == +0.125f && v_mm_div_ps[1] == +0.0f && v_mm_div_ps[2] == +1.0f && v_mm_div_ps[3] == +4.0f);
923-
924-
constexpr __m128 v_mm_and_ps = _mm_and_ps(k3, k4);
925-
static_assert(v_mm_and_ps[0] == +0.0f && v_mm_and_ps[1] == +0.0f && v_mm_and_ps[2] == +0.0f && v_mm_and_ps[3] == +0.0f);
926-
927-
constexpr __m128 v_mm_andnot_ps = _mm_andnot_ps(k3, k4);
928-
static_assert(v_mm_andnot_ps[0] == +0.0f && v_mm_andnot_ps[1] == +0.0f && v_mm_andnot_ps[2] == +0.0f && v_mm_andnot_ps[3] == +0.0f);
929-
930-
constexpr __m128 v_mm_or_ps = _mm_or_ps(k3, k4);
931-
static_assert(v_mm_or_ps[0] == -4.0f && v_mm_or_ps[1] == -5.0f && v_mm_or_ps[2] == -6.0f && v_mm_or_ps[3] == +7.0f);
932-
933-
constexpr __m128 v_mm_xor_ps = _mm_xor_ps(k3, k4);
934-
static_assert(v_mm_xor_ps[0] == -4.0f && v_mm_xor_ps[1] == +5.0f && v_mm_xor_ps[2] == -6.0f && v_mm_xor_ps[3] == +7.0f);
935-
936-
constexpr __m128 v_mm_unpackhi_ps = _mm_unpackhi_ps(k1, k2);
937-
static_assert(v_mm_unpackhi_ps[0] == +2.0f && v_mm_unpackhi_ps[1] == +2.0f && v_mm_unpackhi_ps[2] == +4.0f && v_mm_unpackhi_ps[3] == +1.0f);
938-
939-
constexpr __m128 v_mm_unpacklo_ps = _mm_unpacklo_ps(k1, k2);
940-
static_assert(v_mm_unpacklo_ps[0] == +1.0f && v_mm_unpacklo_ps[1] == +8.0f && v_mm_unpacklo_ps[2] == +0.0f && v_mm_unpacklo_ps[3] == +4.0f);
941-
942-
constexpr __m128 v_mm_move_ss = _mm_move_ss(k1, k2);
943-
static_assert(v_mm_move_ss[0] == +8.0f && v_mm_move_ss[1] == +0.0f && v_mm_move_ss[2] == +2.0f && v_mm_move_ss[3] == +4.0f);
944-
945-
constexpr __m128 v_mm_movehl_ps = _mm_movehl_ps(k1, k2);
946-
static_assert(v_mm_movehl_ps[0] == +2.0f && v_mm_movehl_ps[1] == +1.0f && v_mm_movehl_ps[2] == +2.0f && v_mm_movehl_ps[3] == +4.0f);
947-
948-
constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2);
949-
static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f);
950-
951-
constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42);
952-
static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f);
953-
954-
constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99);
955-
static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f);
956-
957-
constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555);
958-
static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f);
959-
960-
static_assert(_mm_cvtss_f32(k2) == +8.0f);
961-
}
962-
963-
#endif
914+
TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f));

0 commit comments

Comments
 (0)