@@ -64,6 +64,11 @@ struct avx2_half_vector<int32_t> {
64
64
{
65
65
return _mm_set1_epi32 (type_max ());
66
66
} // TODO: this should broadcast bits as is?
67
+ static opmask_t knot_opmask (opmask_t x)
68
+ {
69
+ auto allOnes = seti (-1 , -1 , -1 , -1 );
70
+ return _mm_xor_si128 (x, allOnes);
71
+ }
67
72
static opmask_t get_partial_loadmask (uint64_t num_to_read)
68
73
{
69
74
auto mask = ((0x1ull << num_to_read) - 0x1ull );
@@ -186,6 +191,10 @@ struct avx2_half_vector<int32_t> {
186
191
{
187
192
return v;
188
193
}
194
+ static bool all_false (opmask_t k)
195
+ {
196
+ return _mm_movemask_ps (_mm_castsi128_ps (k)) == 0 ;
197
+ }
189
198
static int double_compressstore (type_t *left_addr,
190
199
type_t *right_addr,
191
200
opmask_t k,
@@ -218,6 +227,11 @@ struct avx2_half_vector<uint32_t> {
218
227
{
219
228
return _mm_set1_epi32 (type_max ());
220
229
}
230
+ static opmask_t knot_opmask (opmask_t x)
231
+ {
232
+ auto allOnes = seti (-1 , -1 , -1 , -1 );
233
+ return _mm_xor_si128 (x, allOnes);
234
+ }
221
235
static opmask_t get_partial_loadmask (uint64_t num_to_read)
222
236
{
223
237
auto mask = ((0x1ull << num_to_read) - 0x1ull );
@@ -331,6 +345,10 @@ struct avx2_half_vector<uint32_t> {
331
345
{
332
346
return v;
333
347
}
348
+ static bool all_false (opmask_t k)
349
+ {
350
+ return _mm_movemask_ps (_mm_castsi128_ps (k)) == 0 ;
351
+ }
334
352
static int double_compressstore (type_t *left_addr,
335
353
type_t *right_addr,
336
354
opmask_t k,
@@ -363,7 +381,11 @@ struct avx2_half_vector<float> {
363
381
{
364
382
return _mm_set1_ps (type_max ());
365
383
}
366
-
384
+ static opmask_t knot_opmask (opmask_t x)
385
+ {
386
+ auto allOnes = seti (-1 , -1 , -1 , -1 );
387
+ return _mm_xor_si128 (x, allOnes);
388
+ }
367
389
static regi_t seti (int v1, int v2, int v3, int v4)
368
390
{
369
391
return _mm_set_epi32 (v1, v2, v3, v4);
@@ -492,6 +514,10 @@ struct avx2_half_vector<float> {
492
514
{
493
515
return _mm_castps_si128 (v);
494
516
}
517
+ static bool all_false (opmask_t k)
518
+ {
519
+ return _mm_movemask_ps (_mm_castsi128_ps (k)) == 0 ;
520
+ }
495
521
static int double_compressstore (type_t *left_addr,
496
522
type_t *right_addr,
497
523
opmask_t k,
0 commit comments