From 91e3a72524bf4cf340dd73ef4f59b0c6c560a380 Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Wed, 2 Apr 2025 22:11:36 +0800 Subject: [PATCH 1/2] feat: sort order Signed-off-by: Junwang Zhao --- src/iceberg/CMakeLists.txt | 2 + src/iceberg/sort_field.cc | 85 +++++++++++++++++++++++++++++++++ src/iceberg/sort_field.h | 95 ++++++++++++++++++++++++++++++++++++ src/iceberg/sort_order.cc | 48 +++++++++++++++++++ src/iceberg/sort_order.h | 65 +++++++++++++++++++++++++ test/CMakeLists.txt | 4 +- test/sort_field_test.cc | 82 +++++++++++++++++++++++++++++++ test/sort_order_test.cc | 98 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 478 insertions(+), 1 deletion(-) create mode 100644 src/iceberg/sort_field.cc create mode 100644 src/iceberg/sort_field.h create mode 100644 src/iceberg/sort_order.cc create mode 100644 src/iceberg/sort_order.h create mode 100644 test/sort_field_test.cc create mode 100644 test/sort_order_test.cc diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index fec895240..321f5df42 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -25,6 +25,8 @@ set(ICEBERG_SOURCES schema_internal.cc partition_field.cc partition_spec.cc + sort_field.cc + sort_order.cc transform.cc type.cc) diff --git a/src/iceberg/sort_field.cc b/src/iceberg/sort_field.cc new file mode 100644 index 000000000..baa6a29de --- /dev/null +++ b/src/iceberg/sort_field.cc @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/sort_field.h" + +#include + +#include "iceberg/transform.h" +#include "iceberg/type.h" +#include "iceberg/util/formatter.h" + +namespace iceberg { + +namespace { +/// \brief Get the relative sort direction name +constexpr std::string_view ToString(SortDirection direction) { + switch (direction) { + case SortDirection::kAscending: + return "asc"; + case SortDirection::kDescending: + return "desc"; + default: + return "invalid"; + } +} + +/// \brief Get the relative null order name +constexpr std::string_view ToString(NullOrder null_order) { + switch (null_order) { + case NullOrder::kFirst: + return "nulls-first"; + case NullOrder::kLast: + return "nulls-last"; + default: + return "invalid"; + } +} +} // namespace + +SortField::SortField(int32_t source_id, std::shared_ptr transform, + SortDirection sort_direction, NullOrder null_order) + : source_id_(source_id), + transform_(std::move(transform)), + sort_direction_(sort_direction), + null_order_(null_order) {} + +int32_t SortField::source_id() const { return source_id_; } + +std::shared_ptr const& SortField::transform() const { + return transform_; +} + +SortDirection SortField::sort_direction() const { return sort_direction_; } + +NullOrder SortField::null_order() const { return null_order_; } + +std::string SortField::ToString() const { + return std::format( + "SortField(source_id={}, transform={}, sort_direction={}, null_order={})", + source_id_, *transform_, iceberg::ToString(sort_direction_), + iceberg::ToString(null_order_)); +} + +bool SortField::Equals(const SortField& other) const { + return source_id_ == other.source_id_ && *transform_ == *other.transform_ && + sort_direction_ == other.sort_direction_ && null_order_ == other.null_order_; +} + +} // namespace iceberg diff --git a/src/iceberg/sort_field.h b/src/iceberg/sort_field.h new file mode 100644 index 000000000..45ac9eecd --- /dev/null +++ b/src/iceberg/sort_field.h @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/sort_field.h +/// A sort field in a sort order + +#include +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/type_fwd.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief Sort direction in a partition, either ascending or descending +enum class SortDirection { + /// Ascending + kAscending, + /// Descending + kDescending, +}; + +enum class NullOrder { + /// Nulls are sorted first + kFirst, + /// Nulls are sorted last + kLast, +}; + +/// \brief a field with its transform. +class ICEBERG_EXPORT SortField : public util::Formattable { + public: + /// \brief Construct a field. + /// \param[in] source_id The source field ID. + /// \param[in] transform The transform function. + /// \param[in] sort_direction The sort direction. + /// \param[in] null_order The null order. + SortField(int32_t source_id, std::shared_ptr transform, + SortDirection sort_direction, NullOrder null_order); + + /// \brief Get the source field ID. + int32_t source_id() const; + + /// \brief Get the transform type. + const std::shared_ptr& transform() const; + + /// \brief Get the sort direction. + SortDirection sort_direction() const; + + /// \brief Get the null order. + NullOrder null_order() const; + + std::string ToString() const override; + + friend bool operator==(const SortField& lhs, const SortField& rhs) { + return lhs.Equals(rhs); + } + + friend bool operator!=(const SortField& lhs, const SortField& rhs) { + return !(lhs == rhs); + } + + private: + /// \brief Compare two fields for equality. + [[nodiscard]] bool Equals(const SortField& other) const; + + int32_t source_id_; + std::shared_ptr transform_; + SortDirection sort_direction_; + NullOrder null_order_; +}; + +} // namespace iceberg diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc new file mode 100644 index 000000000..232d09941 --- /dev/null +++ b/src/iceberg/sort_order.cc @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/sort_order.h" + +#include + +#include "iceberg/util/formatter.h" + +namespace iceberg { + +SortOrder::SortOrder(int64_t order_id, std::vector fields) + : order_id_(order_id), fields_(std::move(fields)) {} + +int64_t SortOrder::order_id() const { return order_id_; } + +std::span SortOrder::fields() const { return fields_; } + +std::string SortOrder::ToString() const { + std::string repr = std::format("sort_order[order_id<{}>,\n", order_id_); + for (const auto& field : fields_) { + std::format_to(std::back_inserter(repr), " {}\n", field); + } + repr += "]"; + return repr; +} + +bool SortOrder::Equals(const SortOrder& other) const { + return order_id_ == other.order_id_ && fields_ == other.fields_; +} + +} // namespace iceberg diff --git a/src/iceberg/sort_order.h b/src/iceberg/sort_order.h new file mode 100644 index 000000000..049e86f72 --- /dev/null +++ b/src/iceberg/sort_order.h @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/sort_field.h" +#include "iceberg/util/formattable.h" + +namespace iceberg { + +/// \brief A sort order for a table +/// +/// A sort order is defined by a sort order id and a list of sort fields. +/// The order of the sort fields within the list defines the order in which the sort is +/// applied to the data. +class ICEBERG_EXPORT SortOrder : public util::Formattable { + public: + SortOrder(int64_t order_id, std::vector fields); + + /// \brief Get the sort order id. + int64_t order_id() const; + + /// \brief Get the list of sort fields. + std::span fields() const; + + std::string ToString() const override; + + friend bool operator==(const SortOrder& lhs, const SortOrder& rhs) { + return lhs.Equals(rhs); + } + + friend bool operator!=(const SortOrder& lhs, const SortOrder& rhs) { + return !(lhs == rhs); + } + + private: + /// \brief Compare two sort orders for equality. + bool Equals(const SortOrder& other) const; + + int64_t order_id_; + std::vector fields_; +}; + +} // namespace iceberg diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 96e319445..c516f99c5 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -30,7 +30,9 @@ target_sources(schema_test type_test.cc transform_test.cc partition_field_test.cc - partition_spec_test.cc) + partition_spec_test.cc + sort_field_test.cc + sort_order_test.cc) target_link_libraries(schema_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock) add_test(NAME schema_test COMMAND schema_test) diff --git a/test/sort_field_test.cc b/test/sort_field_test.cc new file mode 100644 index 000000000..2bac894c3 --- /dev/null +++ b/test/sort_field_test.cc @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/sort_field.h" + +#include + +#include + +#include "iceberg/transform.h" +#include "iceberg/util/formatter.h" + +namespace iceberg { + +namespace { +class TestTransformFunction : public TransformFunction { + public: + TestTransformFunction() : TransformFunction(TransformType::kUnknown) {} + expected Transform(const ArrowArray& input) override { + return unexpected( + Error{.kind = ErrorKind::kNotSupported, .message = "test transform function"}); + } +}; + +} // namespace + +TEST(SortFieldTest, Basics) { + { + const auto transform = std::make_shared(); + SortField field(1, transform, SortDirection::kAscending, NullOrder::kFirst); + EXPECT_EQ(1, field.source_id()); + EXPECT_EQ(*transform, *field.transform()); + EXPECT_EQ(SortDirection::kAscending, field.sort_direction()); + EXPECT_EQ(NullOrder::kFirst, field.null_order()); + EXPECT_EQ( + "SortField(source_id=1, transform=identity, sort_direction=asc, " + "null_order=nulls-first)", + field.ToString()); + EXPECT_EQ( + "SortField(source_id=1, transform=identity, sort_direction=asc, " + "null_order=nulls-first)", + std::format("{}", field)); + } +} + +TEST(SortFieldTest, Equality) { + auto test_transform = std::make_shared(); + auto identity_transform = std::make_shared(); + + SortField field1(1, test_transform, SortDirection::kAscending, NullOrder::kFirst); + SortField field2(2, test_transform, SortDirection::kAscending, NullOrder::kFirst); + SortField field3(1, identity_transform, SortDirection::kAscending, NullOrder::kFirst); + SortField field4(1, test_transform, SortDirection::kDescending, NullOrder::kFirst); + SortField field5(1, test_transform, SortDirection::kAscending, NullOrder::kLast); + + ASSERT_EQ(field1, field1); + ASSERT_NE(field1, field2); + ASSERT_NE(field2, field1); + ASSERT_NE(field1, field3); + ASSERT_NE(field3, field1); + ASSERT_NE(field1, field4); + ASSERT_NE(field4, field1); + ASSERT_NE(field1, field5); + ASSERT_NE(field5, field1); +} +} // namespace iceberg diff --git a/test/sort_order_test.cc b/test/sort_order_test.cc new file mode 100644 index 000000000..abbb8779c --- /dev/null +++ b/test/sort_order_test.cc @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/sort_order.h" + +#include +#include + +#include + +#include "iceberg/schema.h" +#include "iceberg/sort_field.h" +#include "iceberg/transform.h" +#include "iceberg/util/formatter.h" + +namespace iceberg { + +namespace { +class TestTransformFunction : public TransformFunction { + public: + TestTransformFunction() : TransformFunction(TransformType::kUnknown) {} + expected Transform(const ArrowArray& input) override { + return unexpected( + Error{.kind = ErrorKind::kNotSupported, .message = "test transform function"}); + } +}; + +} // namespace + +TEST(SortOrderTest, Basics) { + { + SchemaField field1(5, "ts", std::make_shared(), true); + SchemaField field2(7, "bar", std::make_shared(), true); + + auto identity_transform = std::make_shared(); + SortField st_field1(5, identity_transform, SortDirection::kAscending, + NullOrder::kFirst); + SortField st_field2(7, identity_transform, SortDirection::kDescending, + NullOrder::kFirst); + SortOrder sort_order(100, {st_field1, st_field2}); + ASSERT_EQ(sort_order, sort_order); + std::span fields = sort_order.fields(); + ASSERT_EQ(2, fields.size()); + ASSERT_EQ(st_field1, fields[0]); + ASSERT_EQ(st_field2, fields[1]); + auto sort_order_str = + "sort_order[order_id<100>,\n SortField(source_id=5, transform=identity, " + "sort_direction=asc, null_order=nulls-first)\n SortField(source_id=7, " + "transform=identity, " + "sort_direction=desc, null_order=nulls-first)\n]"; + EXPECT_EQ(sort_order_str, sort_order.ToString()); + EXPECT_EQ(sort_order_str, std::format("{}", sort_order)); + } +} + +TEST(SortOrderTest, Equality) { + SchemaField field1(5, "ts", std::make_shared(), true); + SchemaField field2(7, "bar", std::make_shared(), true); + auto test_transform = std::make_shared(); + auto identity_transform = std::make_shared(); + SortField st_field1(5, identity_transform, SortDirection::kAscending, + NullOrder::kFirst); + SortField st_field2(7, identity_transform, SortDirection::kDescending, + NullOrder::kFirst); + SortField st_field3(7, test_transform, SortDirection::kAscending, NullOrder::kFirst); + SortOrder sort_order1(100, {st_field1, st_field2}); + SortOrder sort_order2(100, {st_field2, st_field3}); + SortOrder sort_order3(100, {st_field1, st_field3}); + SortOrder sort_order4(101, {st_field1, st_field2}); + SortOrder sort_order5(100, {st_field2, st_field1}); + + ASSERT_EQ(sort_order1, sort_order1); + ASSERT_NE(sort_order1, sort_order2); + ASSERT_NE(sort_order2, sort_order1); + ASSERT_NE(sort_order1, sort_order3); + ASSERT_NE(sort_order3, sort_order1); + ASSERT_NE(sort_order1, sort_order4); + ASSERT_NE(sort_order4, sort_order1); + ASSERT_NE(sort_order1, sort_order5); + ASSERT_NE(sort_order5, sort_order1); +} +} // namespace iceberg From 8d4b29b76151b184227aaba05d61116d7662fc9b Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Thu, 3 Apr 2025 08:11:54 +0800 Subject: [PATCH 2/2] change sort_direction to direction according to the spec Signed-off-by: Junwang Zhao --- src/iceberg/sort_field.cc | 13 ++++++------- src/iceberg/sort_field.h | 8 ++++---- test/sort_field_test.cc | 6 +++--- test/sort_order_test.cc | 9 +++++---- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/iceberg/sort_field.cc b/src/iceberg/sort_field.cc index baa6a29de..8b9188d53 100644 --- a/src/iceberg/sort_field.cc +++ b/src/iceberg/sort_field.cc @@ -54,10 +54,10 @@ constexpr std::string_view ToString(NullOrder null_order) { } // namespace SortField::SortField(int32_t source_id, std::shared_ptr transform, - SortDirection sort_direction, NullOrder null_order) + SortDirection direction, NullOrder null_order) : source_id_(source_id), transform_(std::move(transform)), - sort_direction_(sort_direction), + direction_(direction), null_order_(null_order) {} int32_t SortField::source_id() const { return source_id_; } @@ -66,20 +66,19 @@ std::shared_ptr const& SortField::transform() const { return transform_; } -SortDirection SortField::sort_direction() const { return sort_direction_; } +SortDirection SortField::direction() const { return direction_; } NullOrder SortField::null_order() const { return null_order_; } std::string SortField::ToString() const { return std::format( - "SortField(source_id={}, transform={}, sort_direction={}, null_order={})", - source_id_, *transform_, iceberg::ToString(sort_direction_), - iceberg::ToString(null_order_)); + "sort_field(source_id={}, transform={}, direction={}, null_order={})", source_id_, + *transform_, iceberg::ToString(direction_), iceberg::ToString(null_order_)); } bool SortField::Equals(const SortField& other) const { return source_id_ == other.source_id_ && *transform_ == *other.transform_ && - sort_direction_ == other.sort_direction_ && null_order_ == other.null_order_; + direction_ == other.direction_ && null_order_ == other.null_order_; } } // namespace iceberg diff --git a/src/iceberg/sort_field.h b/src/iceberg/sort_field.h index 45ac9eecd..c28b1b653 100644 --- a/src/iceberg/sort_field.h +++ b/src/iceberg/sort_field.h @@ -55,10 +55,10 @@ class ICEBERG_EXPORT SortField : public util::Formattable { /// \brief Construct a field. /// \param[in] source_id The source field ID. /// \param[in] transform The transform function. - /// \param[in] sort_direction The sort direction. + /// \param[in] direction The sort direction. /// \param[in] null_order The null order. SortField(int32_t source_id, std::shared_ptr transform, - SortDirection sort_direction, NullOrder null_order); + SortDirection direction, NullOrder null_order); /// \brief Get the source field ID. int32_t source_id() const; @@ -67,7 +67,7 @@ class ICEBERG_EXPORT SortField : public util::Formattable { const std::shared_ptr& transform() const; /// \brief Get the sort direction. - SortDirection sort_direction() const; + SortDirection direction() const; /// \brief Get the null order. NullOrder null_order() const; @@ -88,7 +88,7 @@ class ICEBERG_EXPORT SortField : public util::Formattable { int32_t source_id_; std::shared_ptr transform_; - SortDirection sort_direction_; + SortDirection direction_; NullOrder null_order_; }; diff --git a/test/sort_field_test.cc b/test/sort_field_test.cc index 2bac894c3..2141a3db1 100644 --- a/test/sort_field_test.cc +++ b/test/sort_field_test.cc @@ -46,14 +46,14 @@ TEST(SortFieldTest, Basics) { SortField field(1, transform, SortDirection::kAscending, NullOrder::kFirst); EXPECT_EQ(1, field.source_id()); EXPECT_EQ(*transform, *field.transform()); - EXPECT_EQ(SortDirection::kAscending, field.sort_direction()); + EXPECT_EQ(SortDirection::kAscending, field.direction()); EXPECT_EQ(NullOrder::kFirst, field.null_order()); EXPECT_EQ( - "SortField(source_id=1, transform=identity, sort_direction=asc, " + "sort_field(source_id=1, transform=identity, direction=asc, " "null_order=nulls-first)", field.ToString()); EXPECT_EQ( - "SortField(source_id=1, transform=identity, sort_direction=asc, " + "sort_field(source_id=1, transform=identity, direction=asc, " "null_order=nulls-first)", std::format("{}", field)); } diff --git a/test/sort_order_test.cc b/test/sort_order_test.cc index abbb8779c..4f12e5f2f 100644 --- a/test/sort_order_test.cc +++ b/test/sort_order_test.cc @@ -60,10 +60,11 @@ TEST(SortOrderTest, Basics) { ASSERT_EQ(st_field1, fields[0]); ASSERT_EQ(st_field2, fields[1]); auto sort_order_str = - "sort_order[order_id<100>,\n SortField(source_id=5, transform=identity, " - "sort_direction=asc, null_order=nulls-first)\n SortField(source_id=7, " - "transform=identity, " - "sort_direction=desc, null_order=nulls-first)\n]"; + "sort_order[order_id<100>,\n" + " sort_field(source_id=5, transform=identity, direction=asc, " + "null_order=nulls-first)\n" + " sort_field(source_id=7, transform=identity, direction=desc, " + "null_order=nulls-first)\n]"; EXPECT_EQ(sort_order_str, sort_order.ToString()); EXPECT_EQ(sort_order_str, std::format("{}", sort_order)); }