apache · Vishwanatha-HD · Nov 19, 2025 · Nov 19, 2025 · Dec 10, 2025 · kou
diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc
@@ -30,33 +30,33 @@ namespace util {
 namespace bit_util {
 
 inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
-  // This will not be correct on big-endian architectures.
-#if !ARROW_LITTLE_ENDIAN
-  ARROW_DCHECK(false);
-#endif
   ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
   if (num_bytes == 8) {
     return util::SafeLoad(reinterpret_cast<const uint64_t*>(bytes));
   } else {
     uint64_t word = 0;
     for (int i = 0; i < num_bytes; ++i) {
+#if ARROW_LITTLE_ENDIAN
       word |= static_cast<uint64_t>(bytes[i]) << (8 * i);
+#else
+      word |= static_cast<uint64_t>(bytes[i]) << (8 * (num_bytes - 1 - i));
+#endif
     }
     return word;
   }
 }
 
 inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) {
-  // This will not be correct on big-endian architectures.
-#if !ARROW_LITTLE_ENDIAN
-  ARROW_DCHECK(false);
-#endif
   ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
   if (num_bytes == 8) {
     util::SafeStore(reinterpret_cast<uint64_t*>(bytes), value);
   } else {
     for (int i = 0; i < num_bytes; ++i) {
+#if ARROW_LITTLE_ENDIAN
       bytes[i] = static_cast<uint8_t>(value >> (8 * i));
+#else
+      bytes[i] = static_cast<uint8_t>(value >> (8 * (num_bytes - 1 - i)));
+#endif
     }
   }
 }
@@ -103,6 +103,9 @@ void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
     *num_indexes = 0;
     for (int i = 0; i < num_bits / unroll; ++i) {
       uint64_t word = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bits)[i]);
+#if !ARROW_LITTLE_ENDIAN
+      word = ::arrow::bit_util::ByteSwap(word);
+#endif
       if (bit_to_search == 0) {
         word = ~word;
       }
@@ -119,6 +122,9 @@ void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits,
   if (tail) {
     const uint8_t* bits_tail = bits + (num_bits - tail) / 8;
     uint64_t word = SafeLoadUpTo8Bytes(bits_tail, (tail + 7) / 8);
+#if !ARROW_LITTLE_ENDIAN
+    word = ::arrow::bit_util::ByteSwap(word);
+#endif
     if (bit_to_search == 0) {
       word = ~word;
     }
@@ -291,6 +297,9 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by
   constexpr int unroll = 8;
   for (int i = num_processed / unroll; i < num_bits / unroll; ++i) {
     uint64_t bytes_next = util::SafeLoad(&reinterpret_cast<const uint64_t*>(bytes)[i]);
+#if !ARROW_LITTLE_ENDIAN
+    bytes_next = ::arrow::bit_util::ByteSwap(bytes_next);
+#endif
     bytes_next &= 0x0101010101010101ULL;
     bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
     bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes
@@ -300,6 +309,9 @@ void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* by
   int tail = num_bits % unroll;
   if (tail) {
     uint64_t bytes_next = SafeLoadUpTo8Bytes(bytes + num_bits - tail, tail);
+#if !ARROW_LITTLE_ENDIAN
+    bytes_next = ::arrow::bit_util::ByteSwap(bytes_next);
+#endif
     bytes_next &= 0x0101010101010101ULL;
     bytes_next |= (bytes_next >> 7);  // Pairs of adjacent output bits in individual bytes
     bytes_next |= (bytes_next >> 14);  // 4 adjacent output bits in individual bytes