@@ -17,75 +17,24 @@ TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xFFFF
1717TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xFF, 21),
1818 21,21,21,21,21,21,21,21));
1919
20- {
21- __m128 a = (__m128)(__v4sf){1,2,3,4};
22- TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, a),
23- 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
24- }
25- {
26- __m128 a = (__m128)(__v4sf){1,2,3,4};
27- TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, a),
28- 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
29- }
30- {
31- __m128d a = (__m128d)(__v2df){1,2};
32- TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, a),
33- 1,2,1,2,1,2,1,2));
34- }
35- {
36- __m128d a = (__m128d)(__v2df){1,2};
37- TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, a),
38- 1,2,1,2,1,2,1,2));
39- }
20+ TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xFFFF, (__m128)(__v4sf){1,2,3,4}),
21+ 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
4022
41- // additionally add i32x4/i32x8 z-forms
42- {
43- __m128i a = (__m128i)(__v4si){0,1,2,3};
44- TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, a),
45- 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
46- }
47- {
48- __m256i a = _mm256_set1_epi32(9);
49- TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, a),
50- 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
51- }
23+ TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xFFFF, (__m128)(__v4sf){1,2,3,4}),
24+ 1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4));
5225
53- // unpack and moves (512)
54- {
55- __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
56- __m512 b = _mm512_set1_ps(0);
57- TEST_CONSTEXPR(match_m512(_mm512_mask_unpackhi_ps(_mm512_setzero_ps(), 0xFFFF, a, b),
58- 15,0,13,0,11,0,9,0,7,0,5,0,3,0,1,0));
59- }
60- {
61- __m512 a = _mm512_set1_ps(1);
62- __m512 b = _mm512_set1_ps(2);
63- TEST_CONSTEXPR(match_m512(_mm512_mask_unpacklo_ps(_mm512_setzero_ps(), 0xFFFF, a, b),
64- 1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2));
65- }
26+ TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xFF, (__m128d)(__v2df){1,2}),
27+ 1,2,1,2,1,2,1,2));
6628
67- // move_ss/move_sd in constexpr masked form
68- {
69- __m128 a = (__m128)(__v4sf){10,2,3,4};
70- __m128 b = (__m128)(__v4sf){20,6,7,8};
71- TEST_CONSTEXPR(match_m128(_mm_mask_move_ss(_mm_setzero_ps(), 0x1, a, b), 20,0,0,0));
72- }
73- {
74- __m128d a = (__m128d)(__v2df){10,2};
75- __m128d b = (__m128d)(__v2df){20,6};
76- TEST_CONSTEXPR(match_m128d(_mm_mask_move_sd(_mm_setzero_pd(), 0x1, a, b), 20,0));
77- }
78- // z-forms for movehdup/moveldup
79- {
80- __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
81- TEST_CONSTEXPR(match_m512(_mm512_maskz_movehdup_ps(0xFFFF, a),
82- 15,15,13,13,11,11,9,9,7,7,5,5,3,3,1,1));
83- }
84- {
85- __m512 a = _mm512_set_ps(16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
86- TEST_CONSTEXPR(match_m512(_mm512_maskz_moveldup_ps(0xFFFF, a),
87- 16,16,14,14,12,12,10,10,8,8,6,6,4,4,2,2));
88- }
29+ TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xFF, (__m128d)(__v2df){1,2}),
30+ 1,2,1,2,1,2,1,2));
31+
32+ // additionally add i32x4/i32x8 z-forms
33+ TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xFFFF, (__m128i)(__v4si){0,1,2,3}),
34+ 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3));
35+
36+ TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xFFFF, _mm256_set1_epi32(9)),
37+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9));
8938
9039__m512d test_mm512_sqrt_pd(__m512d a)
9140{
0 commit comments