|
27 | 27 | #include "arrow/util/checked_cast.h" |
28 | 28 | #include "arrow/util/compression.h" |
29 | 29 | #include "arrow/util/decimal.h" |
| 30 | +#include "arrow/util/endian.h" |
30 | 31 | #include "arrow/util/float16.h" |
31 | 32 | #include "arrow/util/logging_internal.h" |
32 | 33 |
|
@@ -112,6 +113,15 @@ std::enable_if_t<std::is_arithmetic_v<T>, std::string> FormatNumericValue( |
112 | 113 | std::stringstream result; |
113 | 114 | T value{}; |
114 | 115 | std::memcpy(&value, val.data(), sizeof(T)); |
| 116 | + |
| 117 | +#if !ARROW_LITTLE_ENDIAN |
| 118 | + // Parquet stores numeric stats in little-endian. Convert to native-endian |
| 119 | + // before printing to avoid endianness mismatches on big-endian platforms. |
| 120 | + if constexpr (std::is_integral_v<T> && sizeof(T) > 1) { |
| 121 | + value = ::arrow::bit_util::FromLittleEndian(value); |
| 122 | + } |
| 123 | +#endif |
| 124 | + |
115 | 125 | result << value; |
116 | 126 | return result.str(); |
117 | 127 | } |
@@ -212,9 +222,12 @@ std::string FormatStatValue(Type::type parquet_type, ::std::string_view val, |
212 | 222 | return FormatNumericValue<float>(val); |
213 | 223 | } |
214 | 224 | case Type::INT96: { |
215 | | - std::array<int32_t, 3> values{}; |
216 | | - std::memcpy(values.data(), bytes, 3 * sizeof(int32_t)); |
217 | | - result << values[0] << " " << values[1] << " " << values[2]; |
| 225 | + std::array<uint32_t, 3> values{}; |
| 226 | + std::memcpy(values.data(), bytes, 3 * sizeof(uint32_t)); |
| 227 | + // INT96 values are stored in little-endian format |
| 228 | + result << ::arrow::bit_util::FromLittleEndian(values[0]) << " " |
| 229 | + << ::arrow::bit_util::FromLittleEndian(values[1]) << " " |
| 230 | + << ::arrow::bit_util::FromLittleEndian(values[2]); |
218 | 231 | break; |
219 | 232 | } |
220 | 233 | case Type::BYTE_ARRAY: |
|
0 commit comments