@@ -86,6 +86,7 @@ using arrow::Table;
8686using arrow::TimestampArray;
8787
8888using ::arrow::bit_util::FromBigEndian;
89+ using ::arrow::bit_util::ToBigEndian;
8990using ::arrow::internal::checked_cast;
9091using ::arrow::internal::checked_pointer_cast;
9192using ::arrow::internal::SafeLeftShift;
@@ -108,6 +109,62 @@ namespace {
108109template <typename ArrowType>
109110using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
110111
112+ template <typename DecimalType>
113+ Result<std::shared_ptr<::arrow::Scalar>> DecimalScalarFromBigEndianBytes (
114+ std::string_view data, std::shared_ptr<DataType> arrow_type) {
115+ ARROW_ASSIGN_OR_RAISE (
116+ DecimalType decimal,
117+ DecimalType::FromBigEndian (reinterpret_cast <const uint8_t *>(data.data ()),
118+ static_cast <int32_t >(data.size ())));
119+ return ::arrow::MakeScalar (std::move (arrow_type), decimal);
120+ }
121+
122+ // Extract Min and Max scalars from big-endian representation of Decimals.
123+ Status ExtractDecimalMinMaxFromBytes (std::string_view min_bytes,
124+ std::string_view max_bytes,
125+ const LogicalType& logical_type,
126+ std::shared_ptr<::arrow::Scalar>* min,
127+ std::shared_ptr<::arrow::Scalar>* max) {
128+ const DecimalLogicalType& decimal_type =
129+ checked_cast<const DecimalLogicalType&>(logical_type);
130+
131+ Result<std::shared_ptr<DataType>> maybe_type =
132+ Decimal128Type::Make (decimal_type.precision (), decimal_type.scale ());
133+ std::shared_ptr<DataType> arrow_type;
134+ if (maybe_type.ok ()) {
135+ arrow_type = maybe_type.ValueOrDie ();
136+ ARROW_ASSIGN_OR_RAISE (
137+ *min, DecimalScalarFromBigEndianBytes<Decimal128>(min_bytes, arrow_type));
138+ ARROW_ASSIGN_OR_RAISE (*max, DecimalScalarFromBigEndianBytes<Decimal128>(
139+ max_bytes, std::move (arrow_type)));
140+ return Status::OK ();
141+ }
142+ // Fallback to see if Decimal256 can represent the type.
143+ ARROW_ASSIGN_OR_RAISE (
144+ arrow_type, Decimal256Type::Make (decimal_type.precision (), decimal_type.scale ()));
145+ ARROW_ASSIGN_OR_RAISE (
146+ *min, DecimalScalarFromBigEndianBytes<Decimal256>(min_bytes, arrow_type));
147+ ARROW_ASSIGN_OR_RAISE (*max, DecimalScalarFromBigEndianBytes<Decimal256>(
148+ max_bytes, std::move (arrow_type)));
149+
150+ return Status::OK ();
151+ }
152+
153+ template <typename Int>
154+ Status ExtractDecimalMinMaxFromInteger (Int min_value, Int max_value,
155+ const LogicalType& logical_type,
156+ std::shared_ptr<::arrow::Scalar>* min,
157+ std::shared_ptr<::arrow::Scalar>* max) {
158+ static_assert (std::is_integral_v<Int>);
159+ const Int min_be = ToBigEndian (min_value);
160+ const Int max_be = ToBigEndian (max_value);
161+ const auto min_bytes =
162+ std::string_view (reinterpret_cast <const char *>(&min_be), sizeof (min_be));
163+ const auto max_bytes =
164+ std::string_view (reinterpret_cast <const char *>(&max_be), sizeof (max_be));
165+ return ExtractDecimalMinMaxFromBytes (min_bytes, max_bytes, logical_type, min, max);
166+ }
167+
111168template <typename CType, typename StatisticsType>
112169Status MakeMinMaxScalar (const StatisticsType& statistics,
113170 std::shared_ptr<::arrow::Scalar>* min,
@@ -165,17 +222,19 @@ static Status FromInt32Statistics(const Int32Statistics& statistics,
165222 switch (logical_type.type ()) {
166223 case LogicalType::Type::INT:
167224 return MakeMinMaxIntegralScalar (statistics, *type, min, max);
168- break ;
169225 case LogicalType::Type::DATE:
170226 case LogicalType::Type::TIME:
171227 case LogicalType::Type::NONE:
172228 return MakeMinMaxTypedScalar<int32_t >(statistics, type, min, max);
173- break ;
229+ case LogicalType::Type::DECIMAL:
230+ return ExtractDecimalMinMaxFromInteger (statistics.min (), statistics.max (),
231+ logical_type, min, max);
174232 default :
175233 break ;
176234 }
177235
178- return Status::NotImplemented (" Cannot extract statistics for type " );
236+ return Status::NotImplemented (" Cannot extract statistics for INT32 with logical type " ,
237+ logical_type.ToString ());
179238}
180239
181240static Status FromInt64Statistics (const Int64Statistics& statistics,
@@ -188,66 +247,28 @@ static Status FromInt64Statistics(const Int64Statistics& statistics,
188247 switch (logical_type.type ()) {
189248 case LogicalType::Type::INT:
190249 return MakeMinMaxIntegralScalar (statistics, *type, min, max);
191- break ;
192250 case LogicalType::Type::TIME:
193251 case LogicalType::Type::TIMESTAMP:
194252 case LogicalType::Type::NONE:
195253 return MakeMinMaxTypedScalar<int64_t >(statistics, type, min, max);
196- break ;
254+ case LogicalType::Type::DECIMAL:
255+ return ExtractDecimalMinMaxFromInteger (statistics.min (), statistics.max (),
256+ logical_type, min, max);
197257 default :
198258 break ;
199259 }
200260
201- return Status::NotImplemented (" Cannot extract statistics for type " );
202- }
203-
204- template <typename DecimalType>
205- Result<std::shared_ptr<::arrow::Scalar>> FromBigEndianString (
206- const std::string& data, std::shared_ptr<DataType> arrow_type) {
207- ARROW_ASSIGN_OR_RAISE (
208- DecimalType decimal,
209- DecimalType::FromBigEndian (reinterpret_cast <const uint8_t *>(data.data ()),
210- static_cast <int32_t >(data.size ())));
211- return ::arrow::MakeScalar (std::move (arrow_type), decimal);
212- }
213-
214- // Extracts Min and Max scalar from bytes like types (i.e. types where
215- // decimal is encoded as little endian.
216- Status ExtractDecimalMinMaxFromBytesType (const Statistics& statistics,
217- const LogicalType& logical_type,
218- std::shared_ptr<::arrow::Scalar>* min,
219- std::shared_ptr<::arrow::Scalar>* max) {
220- const DecimalLogicalType& decimal_type =
221- checked_cast<const DecimalLogicalType&>(logical_type);
222-
223- Result<std::shared_ptr<DataType>> maybe_type =
224- Decimal128Type::Make (decimal_type.precision (), decimal_type.scale ());
225- std::shared_ptr<DataType> arrow_type;
226- if (maybe_type.ok ()) {
227- arrow_type = maybe_type.ValueOrDie ();
228- ARROW_ASSIGN_OR_RAISE (
229- *min, FromBigEndianString<Decimal128>(statistics.EncodeMin (), arrow_type));
230- ARROW_ASSIGN_OR_RAISE (*max, FromBigEndianString<Decimal128>(statistics.EncodeMax (),
231- std::move (arrow_type)));
232- return Status::OK ();
233- }
234- // Fallback to see if Decimal256 can represent the type.
235- ARROW_ASSIGN_OR_RAISE (
236- arrow_type, Decimal256Type::Make (decimal_type.precision (), decimal_type.scale ()));
237- ARROW_ASSIGN_OR_RAISE (
238- *min, FromBigEndianString<Decimal256>(statistics.EncodeMin (), arrow_type));
239- ARROW_ASSIGN_OR_RAISE (*max, FromBigEndianString<Decimal256>(statistics.EncodeMax (),
240- std::move (arrow_type)));
241-
242- return Status::OK ();
261+ return Status::NotImplemented (" Cannot extract statistics for INT64 with logical type " ,
262+ logical_type.ToString ());
243263}
244264
245265Status ByteArrayStatisticsAsScalars (const Statistics& statistics,
246266 std::shared_ptr<::arrow::Scalar>* min,
247267 std::shared_ptr<::arrow::Scalar>* max) {
248268 auto logical_type = statistics.descr ()->logical_type ();
249269 if (logical_type->type () == LogicalType::Type::DECIMAL) {
250- return ExtractDecimalMinMaxFromBytesType (statistics, *logical_type, min, max);
270+ return ExtractDecimalMinMaxFromBytes (statistics.EncodeMin (), statistics.EncodeMax (),
271+ *logical_type, min, max);
251272 }
252273 std::shared_ptr<::arrow::DataType> type;
253274 if (statistics.descr ()->physical_type () == Type::FIXED_LEN_BYTE_ARRAY) {
0 commit comments