Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions cpp/src/arrow/compute/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,33 @@ namespace util {
namespace bit_util {

inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
// This will not be correct on big-endian architectures.
#if !ARROW_LITTLE_ENDIAN
ARROW_DCHECK(false);
#endif
ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
if (num_bytes == 8) {
return util::SafeLoad(reinterpret_cast<const uint64_t*>(bytes));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work on big-endian system?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing out this.. Now with the way we are handling the tail_bytes and loading the word data, we dont actually need to change "SafeLoadUpTo8Bytes()" function.. With the conditional compilation, this function will never be called on Big-endian architecture.
I have reverted this change.. Tested completely on s390x to see if all the test work. I have pushed a new commit. Please give your review comments. Thanks.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we are not going to update this function for big-endian because it won't be called? If so, why don't we keep the above DCHECK(false)?

} else {
uint64_t word = 0;
for (int i = 0; i < num_bytes; ++i) {
#if ARROW_LITTLE_ENDIAN
word |= static_cast<uint64_t>(bytes[i]) << (8 * i);
#else
word |= static_cast<uint64_t>(bytes[i]) << (8 * (num_bytes - 1 - i));
#endif
}
return word;
}
}

inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) {
// This will not be correct on big-endian architectures.
#if !ARROW_LITTLE_ENDIAN
ARROW_DCHECK(false);
#endif
ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
if (num_bytes == 8) {
util::SafeStore(reinterpret_cast<uint64_t*>(bytes), value);
} else {
for (int i = 0; i < num_bytes; ++i) {
#if ARROW_LITTLE_ENDIAN
bytes[i] = static_cast<uint8_t>(value >> (8 * i));
#else
bytes[i] = static_cast<uint8_t>(value >> (8 * (num_bytes - 1 - i)));
#endif
}
}
}
Expand Down Expand Up @@ -103,6 +103,9 @@ void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
*num_indexes = 0;
for (int i = 0; i < num_bits / unroll; ++i) {
uint64_t word = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[i]);
#if !ARROW_LITTLE_ENDIAN
word = ::arrow::bit_util::ByteSwap(word);
#endif
if (bit_to_search == 0) {
word = ~word;
}
Expand All @@ -119,6 +122,9 @@ void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
if (tail) {
const uint8_t* bits_tail = bits + (num_bits - tail) / 8;
uint64_t word = SafeLoadUpTo8Bytes(bits_tail, (tail + 7) / 8);
#if !ARROW_LITTLE_ENDIAN
word = ::arrow::bit_util::ByteSwap(word);
#endif
if (bit_to_search == 0) {
word = ~word;
}
Expand Down Expand Up @@ -291,6 +297,9 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by
constexpr int unroll = 8;
for (int i = num_processed / unroll; i < num_bits / unroll; ++i) {
uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
#if !ARROW_LITTLE_ENDIAN
bytes_next = ::arrow::bit_util::ByteSwap(bytes_next);
#endif
bytes_next &= 0x0101010101010101ULL;
bytes_next |= (bytes_next >> 7); // Pairs of adjacent output bits in individual bytes
bytes_next |= (bytes_next >> 14); // 4 adjacent output bits in individual bytes
Expand All @@ -300,6 +309,9 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by
int tail = num_bits % unroll;
if (tail) {
uint64_t bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
#if !ARROW_LITTLE_ENDIAN
bytes_next = ::arrow::bit_util::ByteSwap(bytes_next);
#endif
bytes_next &= 0x0101010101010101ULL;
bytes_next |= (bytes_next >> 7); // Pairs of adjacent output bits in individual bytes
bytes_next |= (bytes_next >> 14); // 4 adjacent output bits in individual bytes
Expand Down
Loading