Skip to content

Commit abf7629

Browse files
committed
Add missing unpack getter and dyn dispatch
1 parent 69c31bb commit abf7629

File tree

4 files changed

+64
-1
lines changed

4 files changed

+64
-1
lines changed

cpp/src/arrow/util/bpacking.cc

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,30 @@ template int unpack<uint16_t>(const uint8_t*, uint16_t*, int, int);
6868
template int unpack<uint32_t>(const uint8_t*, uint32_t*, int, int);
6969
template int unpack<uint64_t>(const uint8_t*, uint64_t*, int, int);
7070

71+
namespace {
72+
73+
template <typename Uint>
74+
struct GetUnpackDynamicFunction {
75+
using FunctionType = decltype(&get_unpack_fn_scalar<Uint>);
76+
using Implementation = std::pair<DispatchLevel, FunctionType>;
77+
78+
static auto implementations() {
79+
return std::array {
80+
// Current SIMD unpack algorithm works terribly on SSE4.2 due to lack of variable
81+
// rhsift and poor xsimd fallback.
82+
Implementation{DispatchLevel::NONE, &get_unpack_fn_scalar<Uint>},
83+
#if defined(ARROW_HAVE_RUNTIME_AVX2)
84+
Implementation{DispatchLevel::AVX2, &get_unpack_fn_avx2<Uint>},
85+
#endif
86+
#if defined(ARROW_HAVE_RUNTIME_AVX512)
87+
Implementation{DispatchLevel::AVX512, &get_unpack_fn_avx512<Uint>},
88+
#endif
89+
};
90+
}
91+
};
92+
93+
} // namespace
94+
7195
template <typename Uint>
7296
UnpackFn<Uint> get_unpack_fn(int num_bits) {
7397
if constexpr (std::is_same_v<Uint, uint16_t>) {
@@ -77,7 +101,8 @@ UnpackFn<Uint> get_unpack_fn(int num_bits) {
77101
#if defined(ARROW_HAVE_NEON)
78102
return get_unpack_fn_neon<Uint>(num_bits);
79103
#else
80-
// TODO
104+
static DynamicDispatch<GetUnpackDynamicFunction<Uint> > dispatch;
105+
return dispatch.func(num_bits);
81106
#endif
82107
}
83108
}

cpp/src/arrow/util/bpacking_simd_avx2.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,13 @@ template int unpack_avx2<uint16_t>(const uint8_t*, uint16_t*, int, int);
3030
template int unpack_avx2<uint32_t>(const uint8_t*, uint32_t*, int, int);
3131
template int unpack_avx2<uint64_t>(const uint8_t*, uint64_t*, int, int);
3232

33+
template <typename Uint>
34+
UnpackFn<Uint> get_unpack_fn_avx2(int num_bits) {
35+
return get_unpack_fn<Simd256UnpackerForWidth, Uint>(num_bits);
36+
}
37+
38+
template UnpackFn<uint16_t> get_unpack_fn_avx2<uint16_t>(int);
39+
template UnpackFn<uint32_t> get_unpack_fn_avx2<uint32_t>(int);
40+
template UnpackFn<uint64_t> get_unpack_fn_avx2<uint64_t>(int);
41+
3342
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_simd_avx512.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,12 @@ int unpack_avx512(const uint8_t* in, Uint* out, int batch_size, int num_bits) {
2929
template int unpack_avx512<uint32_t>(const uint8_t*, uint32_t*, int, int);
3030
template int unpack_avx512<uint64_t>(const uint8_t*, uint64_t*, int, int);
3131

32+
template <typename Uint>
33+
UnpackFn<Uint> get_unpack_fn_avx512(int num_bits) {
34+
return get_unpack_fn<Simd512UnpackerForWidth, Uint>(num_bits);
35+
}
36+
37+
template UnpackFn<uint32_t> get_unpack_fn_avx512<uint32_t>(int);
38+
template UnpackFn<uint64_t> get_unpack_fn_avx512<uint64_t>(int);
39+
3240
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_simd_internal.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,18 @@ extern template ARROW_TEMPLATE_EXPORT int unpack_avx2<uint32_t>(const uint8_t*,
8282
extern template ARROW_TEMPLATE_EXPORT int unpack_avx2<uint64_t>(const uint8_t*, uint64_t*,
8383
int, int);
8484

85+
template <typename Uint>
86+
ARROW_EXPORT UnpackFn<Uint> get_unpack_fn_avx2(int num_bits);
87+
88+
extern template ARROW_TEMPLATE_EXPORT UnpackFn<uint16_t> get_unpack_fn_avx2<uint16_t>(
89+
int);
90+
91+
extern template ARROW_TEMPLATE_EXPORT UnpackFn<uint32_t> get_unpack_fn_avx2<uint32_t>(
92+
int);
93+
94+
extern template ARROW_TEMPLATE_EXPORT UnpackFn<uint64_t> get_unpack_fn_avx2<uint64_t>(
95+
int);
96+
8597
#endif
8698

8799
#if defined(ARROW_HAVE_AVX512) || defined(ARROW_HAVE_RUNTIME_AVX512)
@@ -96,6 +108,15 @@ extern template ARROW_TEMPLATE_EXPORT int unpack_avx512<uint32_t>(const uint8_t*
96108
extern template ARROW_TEMPLATE_EXPORT int unpack_avx512<uint64_t>(const uint8_t*,
97109
uint64_t*, int, int);
98110

111+
template <typename Uint>
112+
ARROW_EXPORT UnpackFn<Uint> get_unpack_fn_avx512(int num_bits);
113+
114+
extern template ARROW_TEMPLATE_EXPORT UnpackFn<uint32_t> get_unpack_fn_avx512<uint32_t>(
115+
int);
116+
117+
extern template ARROW_TEMPLATE_EXPORT UnpackFn<uint64_t> get_unpack_fn_avx512<uint64_t>(
118+
int);
119+
99120
#endif
100121

101122
} // namespace arrow::internal

0 commit comments

Comments
 (0)