Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions clang/test/CodeGen/X86/builtin_test_helpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/* Helper methods for builtin intrinsic tests */

#include <immintrin.h>

#if defined(__cplusplus) && (__cplusplus >= 201103L)

constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
}

constexpr bool match_m128d(__m128d v, double x, double y) {
return v[0] == x && v[1] == y;
}

constexpr bool match_m128i(__m128i v, unsigned long long x, unsigned long long y) {
return v[0] == x && v[1] == y;
}

#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)

#else

#define TEST_CONSTEXPR(...)

#endif
127 changes: 35 additions & 92 deletions clang/test/CodeGen/X86/sse-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


#include <immintrin.h>
#include "builtin_test_helpers.h"

// NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll

Expand All @@ -13,6 +14,7 @@ __m128 test_mm_add_ps(__m128 A, __m128 B) {
// CHECK: fadd <4 x float>
return _mm_add_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f));

__m128 test_mm_add_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_add_ss
Expand All @@ -22,19 +24,22 @@ __m128 test_mm_add_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_add_ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f));

__m128 test_mm_and_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_and_ps
// CHECK: and <4 x i32>
return _mm_and_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f));

__m128 test_mm_andnot_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_andnot_ps
// CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
// CHECK: and <4 x i32>
return _mm_andnot_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f));

__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
// CHECK-LABEL: test_mm_cmp_ps_eq_oq
Expand Down Expand Up @@ -322,6 +327,15 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_cvtsi32_ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f));

__m128 test_mm_cvt_si2ss(__m128 A, int B) {
// CHECK-LABEL: test_mm_cvt_si2ss
// CHECK: sitofp i32 %{{.*}} to float
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_cvt_si2ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f));

#ifdef __x86_64__
__m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
Expand All @@ -330,13 +344,15 @@ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_cvtsi64_ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f));
#endif

float test_mm_cvtss_f32(__m128 A) {
// CHECK-LABEL: test_mm_cvtss_f32
// CHECK: extractelement <4 x float> %{{.*}}, i32 0
return _mm_cvtss_f32(A);
}
TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f);

int test_mm_cvtss_si32(__m128 A) {
// CHECK-LABEL: test_mm_cvtss_si32
Expand Down Expand Up @@ -377,6 +393,7 @@ __m128 test_mm_div_ps(__m128 A, __m128 B) {
// CHECK: fdiv <4 x float>
return _mm_div_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f));

__m128 test_mm_div_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_div_ss
Expand All @@ -386,6 +403,7 @@ __m128 test_mm_div_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_div_ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f));

unsigned int test_MM_GET_EXCEPTION_MASK(void) {
// CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
Expand Down Expand Up @@ -517,18 +535,21 @@ __m128 test_mm_move_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_move_ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f));

__m128 test_mm_movehl_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_movehl_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
return _mm_movehl_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f));

__m128 test_mm_movelh_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_movelh_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
return _mm_movelh_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f));

int test_mm_movemask_ps(__m128 A) {
// CHECK-LABEL: test_mm_movemask_ps
Expand All @@ -541,6 +562,7 @@ __m128 test_mm_mul_ps(__m128 A, __m128 B) {
// CHECK: fmul <4 x float>
return _mm_mul_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));

__m128 test_mm_mul_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_mul_ss
Expand All @@ -550,12 +572,14 @@ __m128 test_mm_mul_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_mul_ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));

__m128 test_mm_or_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_or_ps
// CHECK: or <4 x i32>
return _mm_or_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f));

void test_mm_prefetch(char const* p) {
// CHECK-LABEL: test_mm_prefetch
Expand Down Expand Up @@ -628,6 +652,7 @@ __m128 test_mm_set_ps(float A, float B, float C, float D) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_set_ps(A, B, C, D);
}
TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f));

__m128 test_mm_set_ps1(float A) {
// CHECK-LABEL: test_mm_set_ps1
Expand All @@ -637,6 +662,7 @@ __m128 test_mm_set_ps1(float A) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_set_ps1(A);
}
TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f));

void test_MM_SET_ROUNDING_MODE(unsigned int A) {
// CHECK-LABEL: test_MM_SET_ROUNDING_MODE
Expand All @@ -657,6 +683,7 @@ __m128 test_mm_set_ss(float A) {
// CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
return _mm_set_ss(A);
}
TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f));

__m128 test_mm_set1_ps(float A) {
// CHECK-LABEL: test_mm_set1_ps
Expand All @@ -666,6 +693,7 @@ __m128 test_mm_set1_ps(float A) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_set1_ps(A);
}
TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f));

void test_mm_setcsr(unsigned int A) {
// CHECK-LABEL: test_mm_setcsr
Expand All @@ -682,12 +710,14 @@ __m128 test_mm_setr_ps(float A, float B, float C, float D) {
// CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
return _mm_setr_ps(A, B, C, D);
}
TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f));

__m128 test_mm_setzero_ps(void) {
// CHECK-LABEL: test_mm_setzero_ps
// CHECK: store <4 x float> zeroinitializer
return _mm_setzero_ps();
}
TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f));

void test_mm_sfence(void) {
// CHECK-LABEL: test_mm_sfence
Expand Down Expand Up @@ -787,6 +817,7 @@ __m128 test_mm_sub_ps(__m128 A, __m128 B) {
// CHECK: fsub <4 x float>
return _mm_sub_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f));

__m128 test_mm_sub_ss(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_sub_ss
Expand All @@ -796,6 +827,7 @@ __m128 test_mm_sub_ss(__m128 A, __m128 B) {
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_sub_ss(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f));

void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
// CHECK-LABEL: test_MM_TRANSPOSE4_PS
Expand Down Expand Up @@ -857,107 +889,18 @@ __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
return _mm_unpackhi_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f));

__m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_unpacklo_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
return _mm_unpacklo_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f));

__m128 test_mm_xor_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_xor_ps
// CHECK: xor <4 x i32>
return _mm_xor_ps(A, B);
}

// Test constexpr handling.
#if defined(__cplusplus) && (__cplusplus >= 201103L)

void test_constexpr() {
constexpr __m128 k1 {+1.0f,+0.0f,+2.0f,+4.0f};
constexpr __m128 k2 {+8.0f,+4.0f,+2.0f,+1.0f};
constexpr __m128 k3 {-4.0f,-5.0f,+6.0f,+7.0f};
constexpr __m128 k4 {+0.0f,-0.0f,-0.0f,+0.0f};

constexpr __m128 v_mm_set_ss = _mm_set_ss(1.0f);
static_assert(v_mm_set_ss[0] == +1.0f && v_mm_set_ss[1] == +0.0f && v_mm_set_ss[2] == +0.0f && v_mm_set_ss[3] == +0.0f);

constexpr __m128 v_mm_set1_ps = _mm_set1_ps(2.0f);
static_assert(v_mm_set1_ps[0] == +2.0f && v_mm_set1_ps[1] == +2.0f && v_mm_set1_ps[2] == +2.0f && v_mm_set1_ps[3] == +2.0f);

constexpr __m128 v_mm_set_ps1 = _mm_set_ps1(-2.0f);
static_assert(v_mm_set_ps1[0] == -2.0f && v_mm_set_ps1[1] == -2.0f && v_mm_set_ps1[2] == -2.0f && v_mm_set_ps1[3] == -2.0f);

constexpr __m128 v_mm_set_ps = _mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f);
static_assert(v_mm_set_ps[0] == +3.0f && v_mm_set_ps[1] == +2.0f && v_mm_set_ps[2] == +1.0f && v_mm_set_ps[3] == +0.0f);

constexpr __m128 v_mm_setr_ps = _mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f);
static_assert(v_mm_setr_ps[0] == +0.0f && v_mm_setr_ps[1] == +1.0f && v_mm_setr_ps[2] == +2.0f && v_mm_setr_ps[3] == +3.0f);

constexpr __m128 v_mm_setzero_ps = _mm_setzero_ps();
static_assert(v_mm_setzero_ps[0] == +0.0f && v_mm_setzero_ps[1] == +0.0f && v_mm_setzero_ps[2] == +0.0f && v_mm_setzero_ps[3] == +0.0f);

constexpr __m128 v_mm_add_ss = _mm_add_ss(k1, k2);
static_assert(v_mm_add_ss[0] == +9.0f && v_mm_add_ss[1] == +0.0f && v_mm_add_ss[2] == +2.0f && v_mm_add_ss[3] == +4.0f);

constexpr __m128 v_mm_add_ps = _mm_add_ps(k1, k2);
static_assert(v_mm_add_ps[0] == +9.0f && v_mm_add_ps[1] == +4.0f && v_mm_add_ps[2] == +4.0f && v_mm_add_ps[3] == +5.0f);

constexpr __m128 v_mm_sub_ss = _mm_sub_ss(k1, k2);
static_assert(v_mm_sub_ss[0] == -7.0f && v_mm_sub_ss[1] == +0.0f && v_mm_sub_ss[2] == +2.0f && v_mm_sub_ss[3] == +4.0f);

constexpr __m128 v_mm_sub_ps = _mm_sub_ps(k1, k2);
static_assert(v_mm_sub_ps[0] == -7.0f && v_mm_sub_ps[1] == -4.0f && v_mm_sub_ps[2] == +0.0f && v_mm_sub_ps[3] == +3.0f);

constexpr __m128 v_mm_mul_ss = _mm_mul_ss(k1, k2);
static_assert(v_mm_mul_ss[0] == +8.0f && v_mm_mul_ss[1] == +0.0f && v_mm_mul_ss[2] == +2.0f && v_mm_mul_ss[3] == +4.0f);

constexpr __m128 v_mm_mul_ps = _mm_mul_ps(k1, k2);
static_assert(v_mm_mul_ps[0] == +8.0f && v_mm_mul_ps[1] == +0.0f && v_mm_mul_ps[2] == +4.0f && v_mm_mul_ps[3] == +4.0f);

constexpr __m128 v_mm_div_ss = _mm_div_ss(k1, k2);
static_assert(v_mm_div_ss[0] == +0.125f && v_mm_div_ss[1] == +0.0f && v_mm_div_ss[2] == +2.0f && v_mm_div_ss[3] == +4.0f);

constexpr __m128 v_mm_div_ps = _mm_div_ps(k1, k2);
static_assert(v_mm_div_ps[0] == +0.125f && v_mm_div_ps[1] == +0.0f && v_mm_div_ps[2] == +1.0f && v_mm_div_ps[3] == +4.0f);

constexpr __m128 v_mm_and_ps = _mm_and_ps(k3, k4);
static_assert(v_mm_and_ps[0] == +0.0f && v_mm_and_ps[1] == +0.0f && v_mm_and_ps[2] == +0.0f && v_mm_and_ps[3] == +0.0f);

constexpr __m128 v_mm_andnot_ps = _mm_andnot_ps(k3, k4);
static_assert(v_mm_andnot_ps[0] == +0.0f && v_mm_andnot_ps[1] == +0.0f && v_mm_andnot_ps[2] == +0.0f && v_mm_andnot_ps[3] == +0.0f);

constexpr __m128 v_mm_or_ps = _mm_or_ps(k3, k4);
static_assert(v_mm_or_ps[0] == -4.0f && v_mm_or_ps[1] == -5.0f && v_mm_or_ps[2] == -6.0f && v_mm_or_ps[3] == +7.0f);

constexpr __m128 v_mm_xor_ps = _mm_xor_ps(k3, k4);
static_assert(v_mm_xor_ps[0] == -4.0f && v_mm_xor_ps[1] == +5.0f && v_mm_xor_ps[2] == -6.0f && v_mm_xor_ps[3] == +7.0f);

constexpr __m128 v_mm_unpackhi_ps = _mm_unpackhi_ps(k1, k2);
static_assert(v_mm_unpackhi_ps[0] == +2.0f && v_mm_unpackhi_ps[1] == +2.0f && v_mm_unpackhi_ps[2] == +4.0f && v_mm_unpackhi_ps[3] == +1.0f);

constexpr __m128 v_mm_unpacklo_ps = _mm_unpacklo_ps(k1, k2);
static_assert(v_mm_unpacklo_ps[0] == +1.0f && v_mm_unpacklo_ps[1] == +8.0f && v_mm_unpacklo_ps[2] == +0.0f && v_mm_unpacklo_ps[3] == +4.0f);

constexpr __m128 v_mm_move_ss = _mm_move_ss(k1, k2);
static_assert(v_mm_move_ss[0] == +8.0f && v_mm_move_ss[1] == +0.0f && v_mm_move_ss[2] == +2.0f && v_mm_move_ss[3] == +4.0f);

constexpr __m128 v_mm_movehl_ps = _mm_movehl_ps(k1, k2);
static_assert(v_mm_movehl_ps[0] == +2.0f && v_mm_movehl_ps[1] == +1.0f && v_mm_movehl_ps[2] == +2.0f && v_mm_movehl_ps[3] == +4.0f);

constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2);
static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f);

constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42);
static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f);

constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99);
static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f);

constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555);
static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f);

static_assert(_mm_cvtss_f32(k2) == +8.0f);
}

#endif
TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f));
Loading
Loading