@@ -64,6 +64,11 @@ struct avx2_half_vector<int32_t> {
6464 {
6565 return _mm_set1_epi32 (type_max ());
6666 } // TODO: this should broadcast bits as is?
67+ static opmask_t knot_opmask (opmask_t x)
68+ {
69+ auto allOnes = seti (-1 , -1 , -1 , -1 );
70+ return _mm_xor_si128 (x, allOnes);
71+ }
6772 static opmask_t get_partial_loadmask (uint64_t num_to_read)
6873 {
6974 auto mask = ((0x1ull << num_to_read) - 0x1ull );
@@ -186,6 +191,10 @@ struct avx2_half_vector<int32_t> {
186191 {
187192 return v;
188193 }
194+ static bool all_false (opmask_t k)
195+ {
196+ return _mm_movemask_ps (_mm_castsi128_ps (k)) == 0 ;
197+ }
189198 static int double_compressstore (type_t *left_addr,
190199 type_t *right_addr,
191200 opmask_t k,
@@ -218,6 +227,11 @@ struct avx2_half_vector<uint32_t> {
218227 {
219228 return _mm_set1_epi32 (type_max ());
220229 }
230+ static opmask_t knot_opmask (opmask_t x)
231+ {
232+ auto allOnes = seti (-1 , -1 , -1 , -1 );
233+ return _mm_xor_si128 (x, allOnes);
234+ }
221235 static opmask_t get_partial_loadmask (uint64_t num_to_read)
222236 {
223237 auto mask = ((0x1ull << num_to_read) - 0x1ull );
@@ -331,6 +345,10 @@ struct avx2_half_vector<uint32_t> {
331345 {
332346 return v;
333347 }
348+ static bool all_false (opmask_t k)
349+ {
350+ return _mm_movemask_ps (_mm_castsi128_ps (k)) == 0 ;
351+ }
334352 static int double_compressstore (type_t *left_addr,
335353 type_t *right_addr,
336354 opmask_t k,
@@ -363,7 +381,11 @@ struct avx2_half_vector<float> {
363381 {
364382 return _mm_set1_ps (type_max ());
365383 }
366-
384+ static opmask_t knot_opmask (opmask_t x)
385+ {
386+ auto allOnes = seti (-1 , -1 , -1 , -1 );
387+ return _mm_xor_si128 (x, allOnes);
388+ }
367389 static regi_t seti (int v1, int v2, int v3, int v4)
368390 {
369391 return _mm_set_epi32 (v1, v2, v3, v4);
@@ -492,6 +514,10 @@ struct avx2_half_vector<float> {
492514 {
493515 return _mm_castps_si128 (v);
494516 }
517+ static bool all_false (opmask_t k)
518+ {
519+ return _mm_movemask_ps (_mm_castsi128_ps (k)) == 0 ;
520+ }
495521 static int double_compressstore (type_t *left_addr,
496522 type_t *right_addr,
497523 opmask_t k,
0 commit comments