Skip to content

Commit 629c6a1

Browse files
westonpacepitrou
authored andcommitted
GH-36641: [C++] Remove reference to acero from non-acero file (#36650)
### Rationale for this change Files in modules which do not depend on the acero module should not reference files inside the acero module. ### What changes are included in this PR? There were no changes to the body of any functions. I simply moved functions around so that the acero include was no longer needed. There were some conflicts that arose between the class `bit_util` and the namespace `bit_util` and so I got rid of the class in favor of the namespace as that is more similar to how we handle `bit_util` elsewhere. ### Are these changes tested? Sort of. I would like to add an AVX2 CI system as well. I'm not confident any of the CI builds are building with AVX2 enabled. Also, even if we have an AVX2 CI system it would not have caught this issue since the code was only needed definitions from the acero header and was not relying on any actual compiled symbols. However, I think setting up tests to catch this sort of invalid include are beyond the scope of this PR. ### Are there any user-facing changes? No. * Closes: #36641 Lead-authored-by: Weston Pace <[email protected]> Co-authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent e77e13a commit 629c6a1

File tree

3 files changed

+99
-119
lines changed

3 files changed

+99
-119
lines changed

cpp/src/arrow/compute/util.cc

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ void TempVectorStack::release(int id, uint32_t num_bytes) {
5656
--num_vectors_;
5757
}
5858

59-
inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
59+
namespace bit_util {
60+
61+
inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
6062
// This will not be correct on big-endian architectures.
6163
#if !ARROW_LITTLE_ENDIAN
6264
ARROW_DCHECK(false);
@@ -73,7 +75,7 @@ inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes
7375
}
7476
}
7577

76-
inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) {
78+
inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) {
7779
// This will not be correct on big-endian architectures.
7880
#if !ARROW_LITTLE_ENDIAN
7981
ARROW_DCHECK(false);
@@ -88,8 +90,8 @@ inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_
8890
}
8991
}
9092

91-
inline void bit_util::bits_to_indexes_helper(uint64_t word, uint16_t base_index,
92-
int* num_indexes, uint16_t* indexes) {
93+
inline void bits_to_indexes_helper(uint64_t word, uint16_t base_index, int* num_indexes,
94+
uint16_t* indexes) {
9395
int n = *num_indexes;
9496
while (word) {
9597
indexes[n++] = base_index + static_cast<uint16_t>(CountTrailingZeros(word));
@@ -98,9 +100,8 @@ inline void bit_util::bits_to_indexes_helper(uint64_t word, uint16_t base_index,
98100
*num_indexes = n;
99101
}
100102

101-
inline void bit_util::bits_filter_indexes_helper(uint64_t word,
102-
const uint16_t* input_indexes,
103-
int* num_indexes, uint16_t* indexes) {
103+
inline void bits_filter_indexes_helper(uint64_t word, const uint16_t* input_indexes,
104+
int* num_indexes, uint16_t* indexes) {
104105
int n = *num_indexes;
105106
while (word) {
106107
indexes[n++] = input_indexes[CountTrailingZeros(word)];
@@ -110,21 +111,21 @@ inline void bit_util::bits_filter_indexes_helper(uint64_t word,
110111
}
111112

112113
template <int bit_to_search, bool filter_input_indexes>
113-
void bit_util::bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
114-
const uint8_t* bits,
115-
const uint16_t* input_indexes, int* num_indexes,
116-
uint16_t* indexes, uint16_t base_index) {
114+
void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
115+
const uint8_t* bits, const uint16_t* input_indexes,
116+
int* num_indexes, uint16_t* indexes,
117+
uint16_t base_index = 0) {
117118
// 64 bits at a time
118119
constexpr int unroll = 64;
119120
int tail = num_bits % unroll;
120121
#if defined(ARROW_HAVE_AVX2)
121122
if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
122123
if (filter_input_indexes) {
123-
bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes,
124-
num_indexes, indexes);
124+
avx2::bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes,
125+
num_indexes, indexes);
125126
} else {
126-
bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, indexes,
127-
base_index);
127+
avx2::bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes,
128+
indexes, base_index);
128129
}
129130
} else {
130131
#endif
@@ -160,9 +161,9 @@ void bit_util::bits_to_indexes_internal(int64_t hardware_flags, const int num_bi
160161
}
161162
}
162163

163-
void bit_util::bits_to_indexes(int bit_to_search, int64_t hardware_flags, int num_bits,
164-
const uint8_t* bits, int* num_indexes, uint16_t* indexes,
165-
int bit_offset) {
164+
void bits_to_indexes(int bit_to_search, int64_t hardware_flags, int num_bits,
165+
const uint8_t* bits, int* num_indexes, uint16_t* indexes,
166+
int bit_offset) {
166167
bits += bit_offset / 8;
167168
bit_offset %= 8;
168169
*num_indexes = 0;
@@ -193,10 +194,9 @@ void bit_util::bits_to_indexes(int bit_to_search, int64_t hardware_flags, int nu
193194
*num_indexes += num_indexes_new;
194195
}
195196

196-
void bit_util::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
197-
const int num_bits, const uint8_t* bits,
198-
const uint16_t* input_indexes, int* num_indexes,
199-
uint16_t* indexes, int bit_offset) {
197+
void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits,
198+
const uint8_t* bits, const uint16_t* input_indexes,
199+
int* num_indexes, uint16_t* indexes, int bit_offset) {
200200
bits += bit_offset / 8;
201201
bit_offset %= 8;
202202
if (bit_offset != 0) {
@@ -226,19 +226,18 @@ void bit_util::bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
226226
}
227227
}
228228

229-
void bit_util::bits_split_indexes(int64_t hardware_flags, const int num_bits,
230-
const uint8_t* bits, int* num_indexes_bit0,
231-
uint16_t* indexes_bit0, uint16_t* indexes_bit1,
232-
int bit_offset) {
229+
void bits_split_indexes(int64_t hardware_flags, const int num_bits, const uint8_t* bits,
230+
int* num_indexes_bit0, uint16_t* indexes_bit0,
231+
uint16_t* indexes_bit1, int bit_offset) {
233232
bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0,
234233
bit_offset);
235234
int num_indexes_bit1;
236235
bits_to_indexes(1, hardware_flags, num_bits, bits, &num_indexes_bit1, indexes_bit1,
237236
bit_offset);
238237
}
239238

240-
void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits,
241-
const uint8_t* bits, uint8_t* bytes, int bit_offset) {
239+
void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* bits,
240+
uint8_t* bytes, int bit_offset) {
242241
bits += bit_offset / 8;
243242
bit_offset %= 8;
244243
if (bit_offset != 0) {
@@ -258,7 +257,7 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits,
258257
if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
259258
// The function call below processes whole 32 bit chunks together.
260259
num_processed = num_bits - (num_bits % 32);
261-
bits_to_bytes_avx2(num_processed, bits, bytes);
260+
avx2::bits_to_bytes_avx2(num_processed, bits, bytes);
262261
}
263262
#endif
264263
// Processing 8 bits at a time
@@ -290,8 +289,8 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits,
290289
}
291290
}
292291

293-
void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits,
294-
const uint8_t* bytes, uint8_t* bits, int bit_offset) {
292+
void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* bytes,
293+
uint8_t* bits, int bit_offset) {
295294
bits += bit_offset / 8;
296295
bit_offset %= 8;
297296
if (bit_offset != 0) {
@@ -314,7 +313,7 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits,
314313
if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
315314
// The function call below processes whole 32 bit chunks together.
316315
num_processed = num_bits - (num_bits % 32);
317-
bytes_to_bits_avx2(num_processed, bytes, bits);
316+
avx2::bytes_to_bits_avx2(num_processed, bytes, bits);
318317
}
319318
#endif
320319
// Process 8 bits at a time
@@ -338,11 +337,11 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits,
338337
}
339338
}
340339

341-
bool bit_util::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
342-
uint32_t num_bytes) {
340+
bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
341+
uint32_t num_bytes) {
343342
#if defined(ARROW_HAVE_AVX2)
344343
if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
345-
return are_all_bytes_zero_avx2(bytes, num_bytes);
344+
return avx2::are_all_bytes_zero_avx2(bytes, num_bytes);
346345
}
347346
#endif
348347
uint64_t result_or = 0;
@@ -358,6 +357,7 @@ bool bit_util::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
358357
return result_or == 0;
359358
}
360359

360+
} // namespace bit_util
361361
} // namespace util
362362

363363
} // namespace arrow

cpp/src/arrow/compute/util.h

Lines changed: 36 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -139,69 +139,55 @@ class TempVectorHolder {
139139
uint32_t num_elements_;
140140
};
141141

142-
class ARROW_EXPORT bit_util {
143-
public:
144-
static void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
145-
const int num_bits, const uint8_t* bits, int* num_indexes,
146-
uint16_t* indexes, int bit_offset = 0);
142+
namespace bit_util {
147143

148-
static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
144+
ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
149145
const int num_bits, const uint8_t* bits,
150-
const uint16_t* input_indexes, int* num_indexes,
151-
uint16_t* indexes, int bit_offset = 0);
146+
int* num_indexes, uint16_t* indexes,
147+
int bit_offset = 0);
152148

153-
// Input and output indexes may be pointing to the same data (in-place filtering).
154-
static void bits_split_indexes(int64_t hardware_flags, const int num_bits,
155-
const uint8_t* bits, int* num_indexes_bit0,
156-
uint16_t* indexes_bit0, uint16_t* indexes_bit1,
157-
int bit_offset = 0);
149+
ARROW_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
150+
const int num_bits, const uint8_t* bits,
151+
const uint16_t* input_indexes, int* num_indexes,
152+
uint16_t* indexes, int bit_offset = 0);
158153

159-
// Bit 1 is replaced with byte 0xFF.
160-
static void bits_to_bytes(int64_t hardware_flags, const int num_bits,
161-
const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
154+
// Input and output indexes may be pointing to the same data (in-place filtering).
155+
ARROW_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits,
156+
const uint8_t* bits, int* num_indexes_bit0,
157+
uint16_t* indexes_bit0, uint16_t* indexes_bit1,
158+
int bit_offset = 0);
162159

163-
// Return highest bit of each byte.
164-
static void bytes_to_bits(int64_t hardware_flags, const int num_bits,
165-
const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
160+
// Bit 1 is replaced with byte 0xFF.
161+
ARROW_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits,
162+
const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
166163

167-
static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
168-
uint32_t num_bytes);
164+
// Return highest bit of each byte.
165+
ARROW_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits,
166+
const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
169167

170-
private:
171-
inline static uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes);
172-
inline static void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value);
173-
inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index,
174-
int* num_indexes, uint16_t* indexes);
175-
inline static void bits_filter_indexes_helper(uint64_t word,
176-
const uint16_t* input_indexes,
177-
int* num_indexes, uint16_t* indexes);
178-
template <int bit_to_search, bool filter_input_indexes>
179-
static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
180-
const uint8_t* bits, const uint16_t* input_indexes,
181-
int* num_indexes, uint16_t* indexes,
182-
uint16_t base_index = 0);
168+
ARROW_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
169+
uint32_t num_bytes);
183170

184171
#if defined(ARROW_HAVE_AVX2)
185-
static void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
186-
const uint8_t* bits, int* num_indexes,
187-
uint16_t* indexes, uint16_t base_index = 0);
188-
static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
189-
const uint8_t* bits, const uint16_t* input_indexes,
190-
int* num_indexes, uint16_t* indexes);
191-
template <int bit_to_search>
192-
static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
193-
int* num_indexes, uint16_t* indexes,
194-
uint16_t base_index = 0);
195-
template <int bit_to_search>
196-
static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
172+
173+
namespace avx2 {
174+
ARROW_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
175+
const uint8_t* bits,
197176
const uint16_t* input_indexes,
198177
int* num_indexes, uint16_t* indexes);
199-
static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes);
200-
static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits);
201-
static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
178+
ARROW_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
179+
const uint8_t* bits, int* num_indexes,
180+
uint16_t* indexes, uint16_t base_index = 0);
181+
ARROW_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
182+
uint8_t* bytes);
183+
ARROW_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
184+
uint8_t* bits);
185+
ARROW_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
186+
} // namespace avx2
187+
202188
#endif
203-
};
204189

190+
} // namespace bit_util
205191
} // namespace util
206192

207193
namespace compute {

cpp/src/arrow/compute/util_avx2.cc

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,18 @@
1616
// under the License.
1717

1818
#include <immintrin.h>
19+
#include <cstring>
1920

20-
#include "arrow/acero/util.h"
2121
#include "arrow/util/bit_util.h"
22-
23-
namespace arrow {
24-
namespace util {
22+
#include "arrow/util/logging.h"
2523

2624
#if defined(ARROW_HAVE_AVX2)
2725

28-
void bit_util::bits_to_indexes_avx2(int bit_to_search, const int num_bits,
29-
const uint8_t* bits, int* num_indexes,
30-
uint16_t* indexes, uint16_t base_index) {
31-
if (bit_to_search == 0) {
32-
bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, base_index);
33-
} else {
34-
ARROW_DCHECK(bit_to_search == 1);
35-
bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, base_index);
36-
}
37-
}
26+
namespace arrow::util::avx2 {
3827

3928
template <int bit_to_search>
40-
void bit_util::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
41-
int* num_indexes, uint16_t* indexes,
42-
uint16_t base_index) {
29+
void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, int* num_indexes,
30+
uint16_t* indexes, uint16_t base_index = 0) {
4331
// 64 bits at a time
4432
constexpr int unroll = 64;
4533

@@ -82,21 +70,20 @@ void bit_util::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
8270
}
8371
}
8472

85-
void bit_util::bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
86-
const uint8_t* bits,
87-
const uint16_t* input_indexes, int* num_indexes,
88-
uint16_t* indexes) {
73+
void bits_to_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits,
74+
int* num_indexes, uint16_t* indexes, uint16_t base_index) {
8975
if (bit_to_search == 0) {
90-
bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes);
76+
bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, base_index);
9177
} else {
92-
bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes);
78+
ARROW_DCHECK(bit_to_search == 1);
79+
bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, base_index);
9380
}
9481
}
9582

9683
template <int bit_to_search>
97-
void bit_util::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
98-
const uint16_t* input_indexes,
99-
int* out_num_indexes, uint16_t* indexes) {
84+
void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits,
85+
const uint16_t* input_indexes, int* out_num_indexes,
86+
uint16_t* indexes) {
10087
// 64 bits at a time
10188
constexpr int unroll = 64;
10289

@@ -167,8 +154,17 @@ void bit_util::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* b
167154
*out_num_indexes = num_indexes;
168155
}
169156

170-
void bit_util::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
171-
uint8_t* bytes) {
157+
void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits,
158+
const uint16_t* input_indexes, int* num_indexes,
159+
uint16_t* indexes) {
160+
if (bit_to_search == 0) {
161+
bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes);
162+
} else {
163+
bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes);
164+
}
165+
}
166+
167+
void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes) {
172168
constexpr int unroll = 32;
173169

174170
constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL;
@@ -188,8 +184,7 @@ void bit_util::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
188184
}
189185
}
190186

191-
void bit_util::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
192-
uint8_t* bits) {
187+
void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits) {
193188
constexpr int unroll = 32;
194189
// Processing 32 bits at a time
195190
for (int i = 0; i < num_bits / unroll; ++i) {
@@ -198,7 +193,7 @@ void bit_util::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
198193
}
199194
}
200195

201-
bool bit_util::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) {
196+
bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) {
202197
__m256i result_or = _mm256_setzero_si256();
203198
uint32_t i;
204199
for (i = 0; i < num_bytes / 32; ++i) {
@@ -216,7 +211,6 @@ bool bit_util::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes)
216211
return result_or32 == 0;
217212
}
218213

219-
#endif // ARROW_HAVE_AVX2
214+
} // namespace arrow::util::avx2
220215

221-
} // namespace util
222-
} // namespace arrow
216+
#endif // ARROW_HAVE_AVX2

0 commit comments

Comments
 (0)