Skip to content

Commit f49088f

Browse files
committed
Adds missing functions to AVX2 half vectors
1 parent 1b0a374 commit f49088f

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

src/avx2-32bit-half.hpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ struct avx2_half_vector<int32_t> {
6464
{
6565
return _mm_set1_epi32(type_max());
6666
} // TODO: this should broadcast bits as is?
67+
static opmask_t knot_opmask(opmask_t x)
68+
{
69+
auto allOnes = seti(-1, -1, -1, -1);
70+
return _mm_xor_si128(x, allOnes);
71+
}
6772
static opmask_t get_partial_loadmask(uint64_t num_to_read)
6873
{
6974
auto mask = ((0x1ull << num_to_read) - 0x1ull);
@@ -186,6 +191,10 @@ struct avx2_half_vector<int32_t> {
186191
{
187192
return v;
188193
}
194+
static bool all_false(opmask_t k)
195+
{
196+
return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0;
197+
}
189198
static int double_compressstore(type_t *left_addr,
190199
type_t *right_addr,
191200
opmask_t k,
@@ -218,6 +227,11 @@ struct avx2_half_vector<uint32_t> {
218227
{
219228
return _mm_set1_epi32(type_max());
220229
}
230+
static opmask_t knot_opmask(opmask_t x)
231+
{
232+
auto allOnes = seti(-1, -1, -1, -1);
233+
return _mm_xor_si128(x, allOnes);
234+
}
221235
static opmask_t get_partial_loadmask(uint64_t num_to_read)
222236
{
223237
auto mask = ((0x1ull << num_to_read) - 0x1ull);
@@ -331,6 +345,10 @@ struct avx2_half_vector<uint32_t> {
331345
{
332346
return v;
333347
}
348+
static bool all_false(opmask_t k)
349+
{
350+
return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0;
351+
}
334352
static int double_compressstore(type_t *left_addr,
335353
type_t *right_addr,
336354
opmask_t k,
@@ -363,7 +381,11 @@ struct avx2_half_vector<float> {
363381
{
364382
return _mm_set1_ps(type_max());
365383
}
366-
384+
static opmask_t knot_opmask(opmask_t x)
385+
{
386+
auto allOnes = seti(-1, -1, -1, -1);
387+
return _mm_xor_si128(x, allOnes);
388+
}
367389
static regi_t seti(int v1, int v2, int v3, int v4)
368390
{
369391
return _mm_set_epi32(v1, v2, v3, v4);
@@ -492,6 +514,10 @@ struct avx2_half_vector<float> {
492514
{
493515
return _mm_castps_si128(v);
494516
}
517+
static bool all_false(opmask_t k)
518+
{
519+
return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0;
520+
}
495521
static int double_compressstore(type_t *left_addr,
496522
type_t *right_addr,
497523
opmask_t k,

src/avx512-64bit-common.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ struct ymm_vector<float> {
3434
using opmask_t = __mmask8;
3535
static const uint8_t numlanes = 8;
3636
static constexpr simd_type vec_type = simd_type::AVX512;
37-
using swizzle_ops = avx512_ymm_64bit_swizzle_ops;
3837

3938
using swizzle_ops = avx512_ymm_64bit_swizzle_ops;
4039

@@ -237,7 +236,6 @@ struct ymm_vector<uint32_t> {
237236
using opmask_t = __mmask8;
238237
static const uint8_t numlanes = 8;
239238
static constexpr simd_type vec_type = simd_type::AVX512;
240-
using swizzle_ops = avx512_ymm_64bit_swizzle_ops;
241239

242240
using swizzle_ops = avx512_ymm_64bit_swizzle_ops;
243241

@@ -426,7 +424,6 @@ struct ymm_vector<int32_t> {
426424
using opmask_t = __mmask8;
427425
static const uint8_t numlanes = 8;
428426
static constexpr simd_type vec_type = simd_type::AVX512;
429-
using swizzle_ops = avx512_ymm_64bit_swizzle_ops;
430427

431428
using swizzle_ops = avx512_ymm_64bit_swizzle_ops;
432429

0 commit comments

Comments
 (0)