Skip to content

Commit 70b0b8c

Browse files
committed
Add UnpackOptions and max_read_bytes
1 parent 408ef04 commit 70b0b8c

14 files changed

+479
-312
lines changed

cpp/src/arrow/util/bit_stream_utils_internal.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,14 +273,19 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
273273
batch_size = static_cast<int>(remaining_bits / num_bits);
274274
}
275275

276+
const ::arrow::internal::UnpackOptions opts{
277+
/* .batch_size= */ batch_size,
278+
/* .bit_width= */ num_bits,
279+
/* .bit_offset= */ bit_offset_,
280+
/* .max_read_bytes= */ max_bytes_ - byte_offset_,
281+
};
282+
276283
if constexpr (std::is_same_v<T, bool>) {
277-
::arrow::internal::unpack(buffer_ + byte_offset_, v, batch_size, num_bits,
278-
bit_offset_);
284+
::arrow::internal::unpack(buffer_ + byte_offset_, v, opts);
279285

280286
} else {
281287
::arrow::internal::unpack(buffer_ + byte_offset_,
282-
reinterpret_cast<std::make_unsigned_t<T>*>(v), batch_size,
283-
num_bits, bit_offset_);
288+
reinterpret_cast<std::make_unsigned_t<T>*>(v), opts);
284289
}
285290

286291
Advance(batch_size * num_bits);

cpp/src/arrow/util/bpacking.cc

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
#include <array>
1919

20-
#include "arrow/util/bpacking_dispatch_internal.h"
2120
#include "arrow/util/bpacking_internal.h"
2221
#include "arrow/util/bpacking_scalar_internal.h"
2322
#include "arrow/util/bpacking_simd_internal.h"
@@ -52,19 +51,19 @@ struct UnpackDynamicFunction {
5251
} // namespace
5352

5453
template <typename Uint>
55-
void unpack(const uint8_t* in, Uint* out, int batch_size, int num_bits, int bit_offset) {
54+
void unpack(const uint8_t* in, Uint* out, const UnpackOptions& opts) {
5655
#if defined(ARROW_HAVE_NEON)
57-
return unpack_neon(in, out, batch_size, num_bits, bit_offset);
56+
return unpack_neon(in, out, opts);
5857
#else
5958
static DynamicDispatch<UnpackDynamicFunction<Uint> > dispatch;
60-
return dispatch.func(in, out, batch_size, num_bits, bit_offset);
59+
return dispatch.func(in, out, opts);
6160
#endif
6261
}
6362

64-
template void unpack<bool>(const uint8_t*, bool*, int, int, int);
65-
template void unpack<uint8_t>(const uint8_t*, uint8_t*, int, int, int);
66-
template void unpack<uint16_t>(const uint8_t*, uint16_t*, int, int, int);
67-
template void unpack<uint32_t>(const uint8_t*, uint32_t*, int, int, int);
68-
template void unpack<uint64_t>(const uint8_t*, uint64_t*, int, int, int);
63+
template void unpack<bool>(const uint8_t*, bool*, const UnpackOptions&);
64+
template void unpack<uint8_t>(const uint8_t*, uint8_t*, const UnpackOptions&);
65+
template void unpack<uint16_t>(const uint8_t*, uint16_t*, const UnpackOptions&);
66+
template void unpack<uint32_t>(const uint8_t*, uint32_t*, const UnpackOptions&);
67+
template void unpack<uint64_t>(const uint8_t*, uint64_t*, const UnpackOptions&);
6968

7069
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_benchmark.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ namespace arrow::internal {
3434
namespace {
3535

3636
template <typename Int>
37-
using UnpackFunc = void (*)(const uint8_t*, Int*, int, int, int);
37+
using UnpackFunc = void (*)(const uint8_t*, Int*, const UnpackOptions&);
3838

3939
/// Get the number of bytes associate with a packing.
4040
constexpr int32_t GetNumBytes(int32_t num_values, int32_t bit_width) {
@@ -89,8 +89,15 @@ void BM_Unpack(benchmark::State& state, bool aligned, UnpackFunc<Int> unpack, bo
8989

9090
auto unpacked = std::make_unique<Int[]>(num_values);
9191

92+
const ::arrow::internal::UnpackOptions opts{
93+
/* .batch_size= */ num_values,
94+
/* .bit_width= */ bit_width,
95+
/* .bit_offset= */ 0,
96+
/* .max_read_bytes= */ -1,
97+
};
98+
9299
for (auto _ : state) {
93-
unpack(packed_ptr, unpacked.get(), num_values, bit_width, /* bit_offset = */ 0);
100+
unpack(packed_ptr, unpacked.get(), opts);
94101
benchmark::ClobberMemory();
95102
}
96103
state.SetItemsProcessed(num_values * state.iterations());

cpp/src/arrow/util/bpacking_dispatch_internal.h

Lines changed: 274 additions & 136 deletions
Large diffs are not rendered by default.

cpp/src/arrow/util/bpacking_internal.h

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,29 @@
2323

2424
namespace arrow::internal {
2525

26+
struct UnpackOptions {
27+
int batch_size;
28+
int bit_width;
29+
int bit_offset = 0;
30+
int max_read_bytes = -1;
31+
};
32+
2633
template <typename Uint>
27-
ARROW_EXPORT void unpack(const uint8_t* in, Uint* out, int batch_size, int num_bits,
28-
int bit_offset = 0);
34+
ARROW_EXPORT void unpack(const uint8_t* in, Uint* out, const UnpackOptions& opts);
2935

30-
extern template ARROW_TEMPLATE_EXPORT void unpack<bool>(const uint8_t* in, bool* out,
31-
int batch_size, int num_bits,
32-
int bit_offset);
36+
extern template ARROW_TEMPLATE_EXPORT void unpack<bool>( //
37+
const uint8_t* in, bool* out, const UnpackOptions& opts);
3338

34-
extern template ARROW_TEMPLATE_EXPORT void unpack<uint8_t>(const uint8_t* in,
35-
uint8_t* out, int batch_size,
36-
int num_bits, int bit_offset);
39+
extern template ARROW_TEMPLATE_EXPORT void unpack<uint8_t>( //
40+
const uint8_t* in, uint8_t* out, const UnpackOptions& opts);
3741

38-
extern template ARROW_TEMPLATE_EXPORT void unpack<uint16_t>(const uint8_t* in,
39-
uint16_t* out, int batch_size,
40-
int num_bits, int bit_offset);
42+
extern template ARROW_TEMPLATE_EXPORT void unpack<uint16_t>( //
43+
const uint8_t* in, uint16_t* out, const UnpackOptions& opts);
4144

42-
extern template ARROW_TEMPLATE_EXPORT void unpack<uint32_t>(const uint8_t* in,
43-
uint32_t* out, int batch_size,
44-
int num_bits, int bit_offset);
45+
extern template ARROW_TEMPLATE_EXPORT void unpack<uint32_t>( //
46+
const uint8_t* in, uint32_t* out, const UnpackOptions& opts);
4547

46-
extern template ARROW_TEMPLATE_EXPORT void unpack<uint64_t>(const uint8_t* in,
47-
uint64_t* out, int batch_size,
48-
int num_bits, int bit_offset);
48+
extern template ARROW_TEMPLATE_EXPORT void unpack<uint64_t>( //
49+
const uint8_t* in, uint64_t* out, const UnpackOptions& opts);
4950

5051
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_scalar.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,21 @@
1616
// under the License.
1717

1818
#include "arrow/util/bpacking_dispatch_internal.h"
19+
#include "arrow/util/bpacking_internal.h"
1920
#include "arrow/util/bpacking_scalar_generated_internal.h"
2021
#include "arrow/util/bpacking_scalar_internal.h"
2122

2223
namespace arrow::internal {
2324

2425
template <typename Uint>
25-
void unpack_scalar(const uint8_t* in, Uint* out, int batch_size, int num_bits,
26-
int bit_offset) {
27-
return unpack_jump<ScalarUnpackerForWidth>(in, out, batch_size, num_bits, bit_offset);
26+
void unpack_scalar(const uint8_t* in, Uint* out, const UnpackOptions& opts) {
27+
return unpack_jump<ScalarUnpackerForWidth>(in, out, opts);
2828
}
2929

30-
template void unpack_scalar<bool>(const uint8_t*, bool*, int, int, int);
31-
template void unpack_scalar<uint8_t>(const uint8_t*, uint8_t*, int, int, int);
32-
template void unpack_scalar<uint16_t>(const uint8_t*, uint16_t*, int, int, int);
33-
template void unpack_scalar<uint32_t>(const uint8_t*, uint32_t*, int, int, int);
34-
template void unpack_scalar<uint64_t>(const uint8_t*, uint64_t*, int, int, int);
30+
template void unpack_scalar<bool>(const uint8_t*, bool*, const UnpackOptions&);
31+
template void unpack_scalar<uint8_t>(const uint8_t*, uint8_t*, const UnpackOptions&);
32+
template void unpack_scalar<uint16_t>(const uint8_t*, uint16_t*, const UnpackOptions&);
33+
template void unpack_scalar<uint32_t>(const uint8_t*, uint32_t*, const UnpackOptions&);
34+
template void unpack_scalar<uint64_t>(const uint8_t*, uint64_t*, const UnpackOptions&);
3535

3636
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_scalar_internal.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,31 +17,29 @@
1717

1818
#pragma once
1919

20+
#include "arrow/util/bpacking_internal.h"
2021
#include "arrow/util/visibility.h"
2122

2223
#include <cstdint>
2324

2425
namespace arrow::internal {
2526

2627
template <typename Uint>
27-
ARROW_EXPORT void unpack_scalar(const uint8_t* in, Uint* out, int batch_size,
28-
int num_bits, int bit_offset);
28+
ARROW_EXPORT void unpack_scalar(const uint8_t* in, Uint* out, const UnpackOptions& opts);
2929

30-
extern template ARROW_TEMPLATE_EXPORT void unpack_scalar<bool>(const uint8_t* in,
31-
bool* out, int batch_size,
32-
int num_bits,
33-
int bit_offset);
30+
extern template ARROW_TEMPLATE_EXPORT void unpack_scalar<bool>( //
31+
const uint8_t* in, bool* out, const UnpackOptions& opts);
3432

3533
extern template ARROW_TEMPLATE_EXPORT void unpack_scalar<uint8_t>(
36-
const uint8_t* in, uint8_t* out, int batch_size, int num_bits, int bit_offset);
34+
const uint8_t* in, uint8_t* out, const UnpackOptions& opts);
3735

3836
extern template ARROW_TEMPLATE_EXPORT void unpack_scalar<uint16_t>(
39-
const uint8_t* in, uint16_t* out, int batch_size, int num_bits, int bit_offset);
37+
const uint8_t* in, uint16_t* out, const UnpackOptions& opts);
4038

4139
extern template ARROW_TEMPLATE_EXPORT void unpack_scalar<uint32_t>(
42-
const uint8_t* in, uint32_t* out, int batch_size, int num_bits, int bit_offset);
40+
const uint8_t* in, uint32_t* out, const UnpackOptions& opts);
4341

4442
extern template ARROW_TEMPLATE_EXPORT void unpack_scalar<uint64_t>(
45-
const uint8_t* in, uint64_t* out, int batch_size, int num_bits, int bit_offset);
43+
const uint8_t* in, uint64_t* out, const UnpackOptions& opts);
4644

4745
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_simd_avx2.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
#include "arrow/util/bpacking_dispatch_internal.h"
19+
#include "arrow/util/bpacking_internal.h"
1920
#include "arrow/util/bpacking_simd_internal.h"
2021
#include "arrow/util/bpacking_simd_kernel_internal.h"
2122

@@ -25,15 +26,14 @@ template <typename UnpackedUint, int kPackedBitSize>
2526
using Simd256Kernel = Kernel<UnpackedUint, kPackedBitSize, 256>;
2627

2728
template <typename Uint>
28-
void unpack_avx2(const uint8_t* in, Uint* out, int batch_size, int num_bits,
29-
int bit_offset) {
30-
return unpack_jump<Simd256Kernel>(in, out, batch_size, num_bits, bit_offset);
29+
void unpack_avx2(const uint8_t* in, Uint* out, const UnpackOptions& opts) {
30+
return unpack_jump<Simd256Kernel>(in, out, opts);
3131
}
3232

33-
template void unpack_avx2<bool>(const uint8_t*, bool*, int, int, int);
34-
template void unpack_avx2<uint8_t>(const uint8_t*, uint8_t*, int, int, int);
35-
template void unpack_avx2<uint16_t>(const uint8_t*, uint16_t*, int, int, int);
36-
template void unpack_avx2<uint32_t>(const uint8_t*, uint32_t*, int, int, int);
37-
template void unpack_avx2<uint64_t>(const uint8_t*, uint64_t*, int, int, int);
33+
template void unpack_avx2<bool>(const uint8_t*, bool*, const UnpackOptions&);
34+
template void unpack_avx2<uint8_t>(const uint8_t*, uint8_t*, const UnpackOptions&);
35+
template void unpack_avx2<uint16_t>(const uint8_t*, uint16_t*, const UnpackOptions&);
36+
template void unpack_avx2<uint32_t>(const uint8_t*, uint32_t*, const UnpackOptions&);
37+
template void unpack_avx2<uint64_t>(const uint8_t*, uint64_t*, const UnpackOptions&);
3838

3939
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_simd_avx512.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,21 @@
1616
// under the License.
1717

1818
#include "arrow/util/bpacking_dispatch_internal.h"
19+
#include "arrow/util/bpacking_internal.h"
1920
#include "arrow/util/bpacking_simd512_generated_internal.h"
2021
#include "arrow/util/bpacking_simd_internal.h"
2122

2223
namespace arrow::internal {
2324

2425
template <typename Uint>
25-
void unpack_avx512(const uint8_t* in, Uint* out, int batch_size, int num_bits,
26-
int bit_offset) {
27-
return unpack_jump<Simd512UnpackerForWidth>(in, out, batch_size, num_bits, bit_offset);
26+
void unpack_avx512(const uint8_t* in, Uint* out, const UnpackOptions& opts) {
27+
return unpack_jump<Simd512UnpackerForWidth>(in, out, opts);
2828
}
2929

30-
template void unpack_avx512<bool>(const uint8_t*, bool*, int, int, int);
31-
template void unpack_avx512<uint8_t>(const uint8_t*, uint8_t*, int, int, int);
32-
template void unpack_avx512<uint16_t>(const uint8_t*, uint16_t*, int, int, int);
33-
template void unpack_avx512<uint32_t>(const uint8_t*, uint32_t*, int, int, int);
34-
template void unpack_avx512<uint64_t>(const uint8_t*, uint64_t*, int, int, int);
30+
template void unpack_avx512<bool>(const uint8_t*, bool*, const UnpackOptions&);
31+
template void unpack_avx512<uint8_t>(const uint8_t*, uint8_t*, const UnpackOptions&);
32+
template void unpack_avx512<uint16_t>(const uint8_t*, uint16_t*, const UnpackOptions&);
33+
template void unpack_avx512<uint32_t>(const uint8_t*, uint32_t*, const UnpackOptions&);
34+
template void unpack_avx512<uint64_t>(const uint8_t*, uint64_t*, const UnpackOptions&);
3535

3636
} // namespace arrow::internal

cpp/src/arrow/util/bpacking_simd_default.cc

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,15 @@ template <typename UnpackedUint, int kPackedBitSize>
3333
using Simd128Kernel = Kernel<UnpackedUint, kPackedBitSize, 128>;
3434

3535
template <typename Uint>
36-
void UNPACK_PLATFORM(const uint8_t* in, Uint* out, int batch_size, int num_bits,
37-
int bit_offset) {
38-
return unpack_jump<Simd128Kernel>(in, out, batch_size, num_bits, bit_offset);
36+
void UNPACK_PLATFORM(const uint8_t* in, Uint* out, const UnpackOptions& opts) {
37+
return unpack_jump<Simd128Kernel>(in, out, opts);
3938
}
4039

41-
template void UNPACK_PLATFORM<bool>(const uint8_t*, bool*, int, int, int);
42-
template void UNPACK_PLATFORM<uint8_t>(const uint8_t*, uint8_t*, int, int, int);
43-
template void UNPACK_PLATFORM<uint16_t>(const uint8_t*, uint16_t*, int, int, int);
44-
template void UNPACK_PLATFORM<uint32_t>(const uint8_t*, uint32_t*, int, int, int);
45-
template void UNPACK_PLATFORM<uint64_t>(const uint8_t*, uint64_t*, int, int, int);
40+
template void UNPACK_PLATFORM<bool>(const uint8_t*, bool*, const UnpackOptions&);
41+
template void UNPACK_PLATFORM<uint8_t>(const uint8_t*, uint8_t*, const UnpackOptions&);
42+
template void UNPACK_PLATFORM<uint16_t>(const uint8_t*, uint16_t*, const UnpackOptions&);
43+
template void UNPACK_PLATFORM<uint32_t>(const uint8_t*, uint32_t*, const UnpackOptions&);
44+
template void UNPACK_PLATFORM<uint64_t>(const uint8_t*, uint64_t*, const UnpackOptions&);
4645

4746
} // namespace arrow::internal
4847

0 commit comments

Comments
 (0)