Skip to content

Commit 55b6077

Browse files
committed
Fixed problems on 32-bit systems
1 parent 7a83e52 commit 55b6077

File tree

3 files changed

+61
-2
lines changed

3 files changed

+61
-2
lines changed

src/avx2-32bit-half.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,13 @@ struct avx2_half_vector<int32_t> {
100100
return _mm256_mask_i64gather_epi32(
101101
src, (const int *)base, index, mask, scale);
102102
}
103+
template <int scale>
104+
static reg_t
105+
mask_i64gather(reg_t src, opmask_t mask, __m128i index, void const *base)
106+
{
107+
return _mm_mask_i32gather_epi32(
108+
src, (const int *)base, index, mask, scale);
109+
}
103110
static reg_t i64gather(type_t *arr, arrsize_t *ind)
104111
{
105112
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
@@ -237,6 +244,13 @@ struct avx2_half_vector<uint32_t> {
237244
return _mm256_mask_i64gather_epi32(
238245
src, (const int *)base, index, mask, scale);
239246
}
247+
template <int scale>
248+
static reg_t
249+
mask_i64gather(reg_t src, opmask_t mask, __m128i index, void const *base)
250+
{
251+
return _mm_mask_i32gather_epi32(
252+
src, (const int *)base, index, mask, scale);
253+
}
240254
static reg_t i64gather(type_t *arr, arrsize_t *ind)
241255
{
242256
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
@@ -406,6 +420,13 @@ struct avx2_half_vector<float> {
406420
return _mm256_mask_i64gather_ps(
407421
src, (const float *)base, index, _mm_castsi128_ps(mask), scale);
408422
}
423+
template <int scale>
424+
static reg_t
425+
mask_i64gather(reg_t src, opmask_t mask, __m128i index, void const *base)
426+
{
427+
return _mm_mask_i32gather_ps(
428+
src, (const float *)base, index, _mm_castsi128_ps(mask), scale);
429+
}
409430
static reg_t i64gather(type_t *arr, arrsize_t *ind)
410431
{
411432
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);

src/avx2-64bit-qsort.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ struct avx2_vector<int64_t> {
106106
return _mm256_mask_i64gather_epi64(
107107
src, (const long long int *)base, index, mask, scale);
108108
}
109+
template <int scale>
110+
static reg_t
111+
mask_i64gather(reg_t src, opmask_t mask, __m128i index, void const *base)
112+
{
113+
return _mm256_mask_i32gather_epi64(
114+
src, (const long long int *)base, index, mask, scale);
115+
}
109116
static reg_t i64gather(type_t *arr, arrsize_t *ind)
110117
{
111118
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
@@ -252,6 +259,13 @@ struct avx2_vector<uint64_t> {
252259
return _mm256_mask_i64gather_epi64(
253260
src, (const long long int *)base, index, mask, scale);
254261
}
262+
template <int scale>
263+
static reg_t
264+
mask_i64gather(reg_t src, opmask_t mask, __m128i index, void const *base)
265+
{
266+
return _mm256_mask_i32gather_epi64(
267+
src, (const long long int *)base, index, mask, scale);
268+
}
255269
static reg_t i64gather(type_t *arr, arrsize_t *ind)
256270
{
257271
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);
@@ -444,6 +458,17 @@ struct avx2_vector<double> {
444458
scale);
445459
;
446460
}
461+
template <int scale>
462+
static reg_t
463+
mask_i64gather(reg_t src, opmask_t mask, __m128i index, void const *base)
464+
{
465+
return _mm256_mask_i32gather_pd(src,
466+
(const type_t *)base,
467+
index,
468+
_mm256_castsi256_pd(mask),
469+
scale);
470+
;
471+
}
447472
static reg_t i64gather(type_t *arr, arrsize_t *ind)
448473
{
449474
return set(arr[ind[3]], arr[ind[2]], arr[ind[1]], arr[ind[0]]);

src/xss-network-keyvaluesort.hpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,24 @@ struct index_64bit_vector_type<4> {
2222
template <typename keyType, typename valueType>
2323
typename valueType::opmask_t extend_mask(typename keyType::opmask_t mask)
2424
{
25+
using inT = typename keyType::opmask_t;
26+
using outT = typename valueType::opmask_t;
27+
2528
if constexpr (keyType::vec_type == simd_type::AVX512) { return mask; }
2629
else if constexpr (keyType::vec_type == simd_type::AVX2) {
27-
if constexpr (sizeof(mask) == 32) { return mask; }
28-
else {
30+
if constexpr (sizeof(inT) == sizeof(outT)) { return mask; }
31+
else if constexpr (sizeof(inT) == 32 && sizeof(outT) == 16){
32+
// We need to convert a mask made of 64 bit integers to 32 bit integers
33+
// This does this by taking advantage of the fact that the only bit that matters
34+
// is the very topmost bit, which becomes the sign bit when cast to floating point
35+
36+
// TODO try and figure out if there is a better way to do this
37+
return _mm_castps_si128(_mm256_cvtpd_ps(_mm256_castsi256_pd(mask)));
38+
}
39+
else if constexpr (sizeof(inT) == 16 && sizeof(outT) == 32){
2940
return _mm256_cvtepi32_epi64(mask);
41+
}else{
42+
static_assert(sizeof(inT) == -1, "should not reach here");
3043
}
3144
}
3245
else {

0 commit comments

Comments
 (0)