77#include <immintrin.h>
88#include "builtin_test_helpers.h"
99
10- // Constexpr coverage for VLDQ broadcast features (i32x2/f32x2/f64x2/i64x2) that don't have test functions in this file.
11- // The corresponding test_mm*_*_broadcast_* functions are in avx512vldq-builtins.c.
12- TEST_CONSTEXPR (match_v4si (_mm_mask_broadcast_i32x2 (_mm_setzero_si128 (), 0xF , (__m128i )(__v4si ){0 ,1 ,2 ,3 }), 0 ,1 ,0 ,1 ));
13-
14- TEST_CONSTEXPR (match_m256 (_mm256_mask_broadcast_f32x2 (_mm256_setzero_ps (), 0xFF , (__m128 )(__v4sf ){1.f ,2.f ,3.f ,4.f }), 1.f ,2.f ,1.f ,2.f ,1.f ,2.f ,1.f ,2.f ));
15-
16- TEST_CONSTEXPR (match_m256 (_mm256_maskz_broadcast_f32x2 (0xFF , (__m128 )(__v4sf ){1.f ,2.f ,3.f ,4.f }), 1.f ,2.f ,1.f ,2.f ,1.f ,2.f ,1.f ,2.f ));
17-
18- TEST_CONSTEXPR (match_m256d (_mm256_mask_broadcast_f64x2 (_mm256_setzero_pd (), 0xFF , (__m128d )(__v2df ){1.0 ,2.0 }), 1.0 ,2.0 ,1.0 ,2.0 ));
19-
20- TEST_CONSTEXPR (match_m256d (_mm256_maskz_broadcast_f64x2 (0xFF , (__m128d )(__v2df ){1.0 ,2.0 }), 1.0 ,2.0 ,1.0 ,2.0 ));
21-
22- TEST_CONSTEXPR (match_v4di (_mm256_maskz_broadcast_i64x2 (0xF , (__m128i )(__v2di ){1 ,2 }), 1 ,2 ,1 ,2 ));
23-
24-
25- // i32x2 maskz (128/256)
26-
27- TEST_CONSTEXPR (match_v4si (_mm_maskz_broadcast_i32x2 (0xF , (__m128i )(__v4si ){0 ,1 ,2 ,3 }), 0 ,1 ,0 ,1 ));
28-
29-
30-
31- TEST_CONSTEXPR (match_v8si (_mm256_maskz_broadcast_i32x2 (0xFF , (__m128i )(__v4si ){0 ,1 ,2 ,3 }), 0 ,1 ,0 ,1 ,0 ,1 ,0 ,1 ));
32-
33-
34- // unpackhi/lo with full mask behaves like the underlying unpack
35-
36- TEST_CONSTEXPR (match_m128d (_mm_mask_unpackhi_pd (_mm_setzero_pd (), 0x3 , (__m128d )(__v2df ){1.0 ,2.0 }, (__m128d )(__v2df ){3.0 ,4.0 }), 2.0 ,4.0 ));
37- TEST_CONSTEXPR (match_m128d (_mm_mask_unpacklo_pd (_mm_setzero_pd (), 0x3 , (__m128d )(__v2df ){1.0 ,2.0 }, (__m128d )(__v2df ){3.0 ,4.0 }), 1.0 ,3.0 ));
38-
39-
40- TEST_CONSTEXPR (match_m256d (_mm256_mask_unpackhi_pd (_mm256_setzero_pd (), 0xFF , (__m256d )(__v4df ){1.0 ,2.0 ,3.0 ,4.0 }, (__m256d )(__v4df ){5.0 ,6.0 ,7.0 ,8.0 }), 2.0 ,6.0 ,4.0 ,8.0 ));
41- TEST_CONSTEXPR (match_m256d (_mm256_mask_unpacklo_pd (_mm256_setzero_pd (), 0xFF , (__m256d )(__v4df ){1.0 ,2.0 ,3.0 ,4.0 }, (__m256d )(__v4df ){5.0 ,6.0 ,7.0 ,8.0 }), 1.0 ,5.0 ,3.0 ,7.0 ));
42-
43-
44- // movehdup/moveldup with full mask equals the underlying *_move*dup
45-
46- TEST_CONSTEXPR (match_m128 (_mm_mask_movehdup_ps (_mm_setzero_ps (), 0xF , (__m128 )(__v4sf ){1.f ,2.f ,3.f ,4.f }), 2.f ,2.f ,4.f ,4.f ));
47- TEST_CONSTEXPR (match_m128 (_mm_mask_moveldup_ps (_mm_setzero_ps (), 0xF , (__m128 )(__v4sf ){1.f ,2.f ,3.f ,4.f }), 1.f ,1.f ,3.f ,3.f ));
48-
49-
50- TEST_CONSTEXPR (match_m256 (_mm256_mask_movehdup_ps (_mm256_setzero_ps (), 0xFF , (__m256 )(__v8sf ){1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 }), 2 ,2 ,4 ,4 ,6 ,6 ,8 ,8 ));
51- TEST_CONSTEXPR (match_m256 (_mm256_mask_moveldup_ps (_mm256_setzero_ps (), 0xFF , (__m256 )(__v8sf ){1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 }), 1 ,1 ,3 ,3 ,5 ,5 ,7 ,7 ));
52-
53-
5410__mmask8 test_mm_cmpeq_epu32_mask (__m128i __a , __m128i __b ) {
5511 // CHECK-LABEL: test_mm_cmpeq_epu32_mask
5612 // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
@@ -7675,6 +7631,8 @@ __m128d test_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d
76757631 return _mm_mask_unpackhi_pd (__W , __U , __A , __B );
76767632}
76777633
7634+ TEST_CONSTEXPR (match_m128d (_mm_mask_unpackhi_pd (_mm_setzero_pd (), 0x3 , (__m128d )(__v2df ){1.0 ,2.0 }, (__m128d )(__v2df ){3.0 ,4.0 }), 2.0 ,4.0 ));
7635+
76787636__m128d test_mm_maskz_unpackhi_pd (__mmask8 __U , __m128d __A , __m128d __B ) {
76797637 // CHECK-LABEL: test_mm_maskz_unpackhi_pd
76807638 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
@@ -7689,6 +7647,8 @@ __m256d test_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
76897647 return _mm256_mask_unpackhi_pd (__W , __U , __A , __B );
76907648}
76917649
7650+ TEST_CONSTEXPR (match_m256d (_mm256_mask_unpackhi_pd (_mm256_setzero_pd (), 0xFF , (__m256d )(__v4df ){1.0 ,2.0 ,3.0 ,4.0 }, (__m256d )(__v4df ){5.0 ,6.0 ,7.0 ,8.0 }), 2.0 ,6.0 ,4.0 ,8.0 ));
7651+
76927652__m256d test_mm256_maskz_unpackhi_pd (__mmask8 __U , __m256d __A , __m256d __B ) {
76937653 // CHECK-LABEL: test_mm256_maskz_unpackhi_pd
76947654 // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -7731,6 +7691,8 @@ __m128d test_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d
77317691 return _mm_mask_unpacklo_pd (__W , __U , __A , __B );
77327692}
77337693
7694+ TEST_CONSTEXPR (match_m128d (_mm_mask_unpacklo_pd (_mm_setzero_pd (), 0x3 , (__m128d )(__v2df ){1.0 ,2.0 }, (__m128d )(__v2df ){3.0 ,4.0 }), 1.0 ,3.0 ));
7695+
77347696__m128d test_mm_maskz_unpacklo_pd (__mmask8 __U , __m128d __A , __m128d __B ) {
77357697 // CHECK-LABEL: test_mm_maskz_unpacklo_pd
77367698 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
@@ -7745,6 +7707,8 @@ __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
77457707 return _mm256_mask_unpacklo_pd (__W , __U , __A , __B );
77467708}
77477709
7710+ TEST_CONSTEXPR (match_m256d (_mm256_mask_unpacklo_pd (_mm256_setzero_pd (), 0xFF , (__m256d )(__v4df ){1.0 ,2.0 ,3.0 ,4.0 }, (__m256d )(__v4df ){5.0 ,6.0 ,7.0 ,8.0 }), 1.0 ,5.0 ,3.0 ,7.0 ));
7711+
77487712__m256d test_mm256_maskz_unpacklo_pd (__mmask8 __U , __m256d __A , __m256d __B ) {
77497713 // CHECK-LABEL: test_mm256_maskz_unpacklo_pd
77507714 // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -10380,6 +10344,8 @@ __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
1038010344 return _mm_mask_movehdup_ps (__W , __U , __A );
1038110345}
1038210346
10347+ TEST_CONSTEXPR (match_m128 (_mm_mask_movehdup_ps (_mm_setzero_ps (), 0xF , (__m128 )(__v4sf ){1.f ,2.f ,3.f ,4.f }), 2.f ,2.f ,4.f ,4.f ));
10348+
1038310349__m128 test_mm_maskz_movehdup_ps (__mmask8 __U , __m128 __A ) {
1038410350 // CHECK-LABEL: test_mm_maskz_movehdup_ps
1038510351 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@@ -10394,6 +10360,8 @@ __m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
1039410360 return _mm256_mask_movehdup_ps (__W , __U , __A );
1039510361}
1039610362
10363+ TEST_CONSTEXPR (match_m256 (_mm256_mask_movehdup_ps (_mm256_setzero_ps (), 0xFF , (__m256 )(__v8sf ){1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 }), 2 ,2 ,4 ,4 ,6 ,6 ,8 ,8 ));
10364+
1039710365__m256 test_mm256_maskz_movehdup_ps (__mmask8 __U , __m256 __A ) {
1039810366 // CHECK-LABEL: test_mm256_maskz_movehdup_ps
1039910367 // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@@ -10408,6 +10376,8 @@ __m128 test_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
1040810376 return _mm_mask_moveldup_ps (__W , __U , __A );
1040910377}
1041010378
10379+ TEST_CONSTEXPR (match_m128 (_mm_mask_moveldup_ps (_mm_setzero_ps (), 0xF , (__m128 )(__v4sf ){1.f ,2.f ,3.f ,4.f }), 1.f ,1.f ,3.f ,3.f ));
10380+
1041110381__m128 test_mm_maskz_moveldup_ps (__mmask8 __U , __m128 __A ) {
1041210382 // CHECK-LABEL: test_mm_maskz_moveldup_ps
1041310383 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@@ -10422,6 +10392,8 @@ __m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
1042210392 return _mm256_mask_moveldup_ps (__W , __U , __A );
1042310393}
1042410394
10395+ TEST_CONSTEXPR (match_m256 (_mm256_mask_moveldup_ps (_mm256_setzero_ps (), 0xFF , (__m256 )(__v8sf ){1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 }), 1 ,1 ,3 ,3 ,5 ,5 ,7 ,7 ));
10396+
1042510397__m256 test_mm256_maskz_moveldup_ps (__mmask8 __U , __m256 __A ) {
1042610398 // CHECK-LABEL: test_mm256_maskz_moveldup_ps
1042710399 // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
0 commit comments