Skip to content

Commit 242c4e5

Browse files
GH-48208 Fix Types logic to enable Parquet DB support on s390x
1 parent d16ba00 commit 242c4e5

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

cpp/src/parquet/types.cc

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "arrow/util/checked_cast.h"
2828
#include "arrow/util/compression.h"
2929
#include "arrow/util/decimal.h"
30+
#include "arrow/util/endian.h"
3031
#include "arrow/util/float16.h"
3132
#include "arrow/util/logging_internal.h"
3233

@@ -112,6 +113,15 @@ std::enable_if_t<std::is_arithmetic_v<T>, std::string> FormatNumericValue(
112113
std::stringstream result;
113114
T value{};
114115
std::memcpy(&value, val.data(), sizeof(T));
116+
117+
#if !ARROW_LITTLE_ENDIAN
118+
// Parquet stores numeric stats in little-endian. Convert to native-endian
119+
// before printing to avoid endianness mismatches on big-endian platforms.
120+
if constexpr (std::is_integral_v<T> && sizeof(T) > 1) {
121+
value = ::arrow::bit_util::FromLittleEndian(value);
122+
}
123+
#endif
124+
115125
result << value;
116126
return result.str();
117127
}
@@ -212,9 +222,12 @@ std::string FormatStatValue(Type::type parquet_type, ::std::string_view val,
212222
return FormatNumericValue<float>(val);
213223
}
214224
case Type::INT96: {
215-
std::array<int32_t, 3> values{};
216-
std::memcpy(values.data(), bytes, 3 * sizeof(int32_t));
217-
result << values[0] << " " << values[1] << " " << values[2];
225+
std::array<uint32_t, 3> values{};
226+
std::memcpy(values.data(), bytes, 3 * sizeof(uint32_t));
227+
// INT96 values are stored in little-endian format
228+
result << ::arrow::bit_util::FromLittleEndian(values[0]) << " "
229+
<< ::arrow::bit_util::FromLittleEndian(values[1]) << " "
230+
<< ::arrow::bit_util::FromLittleEndian(values[2]);
218231
break;
219232
}
220233
case Type::BYTE_ARRAY:

cpp/src/parquet/types.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <string>
2727
#include <string_view>
2828

29+
#include "arrow/util/endian.h"
2930
#include "parquet/platform.h"
3031
#include "parquet/type_fwd.h"
3132
#include "parquet/windows_fixup.h" // for OPTIONAL
@@ -705,7 +706,12 @@ static inline std::string ByteArrayToString(const ByteArray& a) {
705706
}
706707

707708
static inline void Int96SetNanoSeconds(parquet::Int96& i96, int64_t nanoseconds) {
709+
#if ARROW_LITTLE_ENDIAN
708710
std::memcpy(&i96.value, &nanoseconds, sizeof(nanoseconds));
711+
#else
712+
i96.value[0] = static_cast<uint32_t>(nanoseconds);
713+
i96.value[1] = static_cast<uint32_t>(nanoseconds >> 32);
714+
#endif
709715
}
710716

711717
struct DecodedInt96 {
@@ -720,7 +726,12 @@ static inline DecodedInt96 DecodeInt96Timestamp(const parquet::Int96& i96) {
720726
result.days_since_epoch = i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
721727
result.nanoseconds = 0;
722728

729+
#if ARROW_LITTLE_ENDIAN
723730
memcpy(&result.nanoseconds, &i96.value, sizeof(uint64_t));
731+
#else
732+
result.nanoseconds =
733+
static_cast<uint64_t>(i96.value[0]) | (static_cast<uint64_t>(i96.value[1]) << 32);
734+
#endif
724735
return result;
725736
}
726737

0 commit comments

Comments
 (0)