Skip to content

Commit a6f2bf9

Browse files
GH-48206 Fix Statistics logic to enable Parquet DB support on s390x
1 parent d16ba00 commit a6f2bf9

File tree

1 file changed

+80
-3
lines changed

1 file changed

+80
-3
lines changed

cpp/src/parquet/statistics.cc

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "arrow/type_traits.h"
3131
#include "arrow/util/bit_run_reader.h"
3232
#include "arrow/util/checked_cast.h"
33+
#include "arrow/util/endian.h"
3334
#include "arrow/util/float16.h"
3435
#include "arrow/util/logging_internal.h"
3536
#include "arrow/util/ubsan.h"
@@ -923,19 +924,95 @@ void TypedStatisticsImpl<DType>::UpdateSpaced(const T* values, const uint8_t* va
923924
valid_bits_offset));
924925
}
925926

927+
template <typename T>
928+
T ToLittleEndianValue(const T& value) {
929+
#if ARROW_LITTLE_ENDIAN
930+
return value;
931+
#else
932+
if constexpr (std::is_integral_v<T>) {
933+
return ::arrow::bit_util::ToLittleEndian(value);
934+
} else if constexpr (std::is_floating_point_v<T>) {
935+
using UInt = std::conditional_t<sizeof(T) == 4, uint32_t,
936+
std::conditional_t<sizeof(T) == 8, uint64_t, void>>;
937+
938+
UInt bits;
939+
std::memcpy(&bits, &value, sizeof(bits));
940+
bits = ::arrow::bit_util::ToLittleEndian(bits);
941+
942+
T out;
943+
std::memcpy(&out, &bits, sizeof(out));
944+
return out;
945+
} else {
946+
return value; // non-numeric types handled elsewhere
947+
}
948+
#endif
949+
}
950+
951+
template <typename T>
952+
T FromLittleEndianValue(const char* src, size_t src_size) {
953+
#if ARROW_LITTLE_ENDIAN
954+
T out{};
955+
std::memcpy(&out, src, std::min(src_size, sizeof(T)));
956+
return out;
957+
#else
958+
if constexpr (std::is_integral_v<T>) {
959+
T value{};
960+
std::memcpy(&value, src, std::min(src_size, sizeof(T)));
961+
return ::arrow::bit_util::FromLittleEndian(value);
962+
} else if constexpr (std::is_floating_point_v<T>) {
963+
using UInt = std::conditional_t<sizeof(T) == 4, uint32_t,
964+
std::conditional_t<sizeof(T) == 8, uint64_t, void>>;
965+
966+
UInt bits{};
967+
std::memcpy(&bits, src, std::min(src_size, sizeof(bits)));
968+
bits = ::arrow::bit_util::FromLittleEndian(bits);
969+
970+
T out;
971+
std::memcpy(&out, &bits, sizeof(out));
972+
return out;
973+
} else {
974+
T out{};
975+
std::memcpy(&out, src, std::min(src_size, sizeof(T)));
976+
return out;
977+
}
978+
#endif
979+
}
980+
981+
template <typename DType>
982+
constexpr bool kIsArithmeticType = std::is_arithmetic_v<typename DType::c_type>;
983+
926984
template <typename DType>
927985
void TypedStatisticsImpl<DType>::PlainEncode(const T& src, std::string* dst) const {
986+
using CType = typename DType::c_type;
987+
988+
// Fast path: fixed-width arithmetic types (int32/int64/float/double)
989+
if constexpr (kIsArithmeticType<DType>) {
990+
CType le_value = ToLittleEndianValue(src);
991+
dst->assign(reinterpret_cast<const char*>(&le_value), sizeof(le_value));
992+
return;
993+
}
994+
995+
// Generic fallback for non-arithmetic types
928996
auto encoder = MakeTypedEncoder<DType>(Encoding::PLAIN, false, descr_, pool_);
929997
encoder->Put(&src, 1);
930998
auto buffer = encoder->FlushValues();
931-
auto ptr = reinterpret_cast<const char*>(buffer->data());
932-
dst->assign(ptr, static_cast<size_t>(buffer->size()));
999+
dst->assign(reinterpret_cast<const char*>(buffer->data()),
1000+
static_cast<size_t>(buffer->size()));
9331001
}
9341002

9351003
template <typename DType>
9361004
void TypedStatisticsImpl<DType>::PlainDecode(const std::string& src, T* dst) const {
1005+
using CType = typename DType::c_type;
1006+
1007+
// Fast path: fixed-width arithmetic types
1008+
if constexpr (kIsArithmeticType<DType>) {
1009+
*dst = FromLittleEndianValue<CType>(src.data(), src.size());
1010+
return;
1011+
}
1012+
1013+
// Fallback for non-arithmetic types
9371014
auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
938-
decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()),
1015+
decoder->SetData(1, reinterpret_cast<const uint8_t*>(src.data()),
9391016
static_cast<int>(src.size()));
9401017
int decoded_values = decoder->Decode(dst, 1);
9411018
if (decoded_values != 1) {

0 commit comments

Comments
 (0)