Skip to content

Commit 1c431b6

Browse files
authored
feat: add satisfies order for SortField/SortOrder and Transform (#284)
This PR also makes the `ToString` consistent with Java implementation.
1 parent 0dbb593 commit 1c431b6

File tree

9 files changed

+336
-20
lines changed

9 files changed

+336
-20
lines changed

src/iceberg/sort_field.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,18 @@ SortDirection SortField::direction() const { return direction_; }
4141

4242
NullOrder SortField::null_order() const { return null_order_; }
4343

44+
bool SortField::Satisfies(const SortField& other) const {
45+
if (*this == other) {
46+
return true;
47+
} else if (source_id_ != other.source_id() || direction_ != other.direction() ||
48+
null_order_ != other.null_order()) {
49+
return false;
50+
}
51+
return transform_->SatisfiesOrderOf(*other.transform());
52+
}
53+
4454
std::string SortField::ToString() const {
45-
return std::format(
46-
"sort_field(source_id={}, transform={}, direction={}, null_order={})", source_id_,
47-
*transform_, direction_, null_order_);
55+
return std::format("{}({}) {} {}", *transform_, source_id_, direction_, null_order_);
4856
}
4957

5058
bool SortField::Equals(const SortField& other) const {

src/iceberg/sort_field.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,15 @@ class ICEBERG_EXPORT SortField : public util::Formattable {
107107
/// \brief Get the null order.
108108
NullOrder null_order() const;
109109

110+
/// \brief Checks whether this field's order satisfies another field's order.
111+
bool Satisfies(const SortField& other) const;
112+
110113
std::string ToString() const override;
111114

112115
friend bool operator==(const SortField& lhs, const SortField& rhs) {
116+
if (&lhs == &rhs) {
117+
return true;
118+
}
113119
return lhs.Equals(rhs);
114120
}
115121

src/iceberg/sort_order.cc

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "iceberg/sort_order.h"
2121

2222
#include <format>
23+
#include <ranges>
2324

2425
#include "iceberg/util/formatter.h" // IWYU pragma: keep
2526

@@ -38,10 +39,38 @@ int32_t SortOrder::order_id() const { return order_id_; }
3839

3940
std::span<const SortField> SortOrder::fields() const { return fields_; }
4041

42+
bool SortOrder::Satisfies(const SortOrder& other) const {
43+
// any ordering satisfies an unsorted ordering
44+
if (other.is_unsorted()) {
45+
return true;
46+
}
47+
48+
// this ordering cannot satisfy an ordering with more sort fields
49+
if (fields_.size() < other.fields().size()) {
50+
return false;
51+
}
52+
53+
// this ordering has either more or the same number of sort fields
54+
for (const auto& [field, other_field] : std::views::zip(fields_, other.fields_)) {
55+
if (!field.Satisfies(other_field)) {
56+
return false;
57+
}
58+
}
59+
60+
return true;
61+
}
62+
63+
bool SortOrder::SameOrder(const SortOrder& other) const {
64+
return fields_ == other.fields_;
65+
}
66+
4167
std::string SortOrder::ToString() const {
42-
std::string repr = std::format("sort_order[order_id<{}>,\n", order_id_);
68+
std::string repr = "[";
4369
for (const auto& field : fields_) {
44-
std::format_to(std::back_inserter(repr), " {}\n", field);
70+
std::format_to(std::back_inserter(repr), "\n {}", field);
71+
}
72+
if (!fields_.empty()) {
73+
repr.push_back('\n');
4574
}
4675
repr += "]";
4776
return repr;

src/iceberg/sort_order.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,20 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable {
4949
/// \brief Get the list of sort fields.
5050
std::span<const SortField> fields() const;
5151

52+
/// \brief Returns true if the sort order is sorted
53+
bool is_sorted() const { return !fields_.empty(); }
54+
55+
/// \brief Returns true if the sort order is unsorted
56+
/// A SortOrder is unsorted if it has no sort fields.
57+
bool is_unsorted() const { return fields_.empty(); }
58+
59+
/// \brief Checks whether this order satisfies another order.
60+
bool Satisfies(const SortOrder& other) const;
61+
62+
/// \brief Checks whether this order is equivalent to another order while ignoring the
63+
/// order id.
64+
bool SameOrder(const SortOrder& other) const;
65+
5266
std::string ToString() const override;
5367

5468
friend bool operator==(const SortOrder& lhs, const SortOrder& rhs) {

src/iceberg/test/sort_field_test.cc

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,8 @@ TEST(SortFieldTest, Basics) {
3636
EXPECT_EQ(*transform, *field.transform());
3737
EXPECT_EQ(SortDirection::kAscending, field.direction());
3838
EXPECT_EQ(NullOrder::kFirst, field.null_order());
39-
EXPECT_EQ(
40-
"sort_field(source_id=1, transform=identity, direction=asc, "
41-
"null_order=nulls-first)",
42-
field.ToString());
43-
EXPECT_EQ(
44-
"sort_field(source_id=1, transform=identity, direction=asc, "
45-
"null_order=nulls-first)",
46-
std::format("{}", field));
39+
EXPECT_EQ(field.ToString(), "identity(1) asc nulls-first");
40+
EXPECT_EQ(std::format("{}", field), "identity(1) asc nulls-first");
4741
}
4842
}
4943

@@ -67,4 +61,23 @@ TEST(SortFieldTest, Equality) {
6761
ASSERT_NE(field1, field5);
6862
ASSERT_NE(field5, field1);
6963
}
64+
65+
TEST(SortFieldTest, Satisfies) {
66+
const auto bucket_transform = Transform::Bucket(8);
67+
const auto identity_transform = Transform::Identity();
68+
69+
SortField field1(1, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);
70+
SortField field2(1, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);
71+
SortField field3(1, identity_transform, SortDirection::kAscending, NullOrder::kFirst);
72+
SortField field4(1, bucket_transform, SortDirection::kDescending, NullOrder::kFirst);
73+
SortField field5(1, bucket_transform, SortDirection::kAscending, NullOrder::kLast);
74+
SortField field6(2, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);
75+
76+
EXPECT_TRUE(field1.Satisfies(field2)); // Same fields
77+
EXPECT_FALSE(field1.Satisfies(field3)); // Different transform
78+
EXPECT_FALSE(field1.Satisfies(field4)); // Different direction
79+
EXPECT_FALSE(field1.Satisfies(field5)); // Different null order
80+
EXPECT_FALSE(field1.Satisfies(field6)); // Different source_id
81+
}
82+
7083
} // namespace iceberg

src/iceberg/test/sort_order_test.cc

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,12 @@ TEST(SortOrderTest, Basics) {
4848
ASSERT_EQ(st_field1, fields[0]);
4949
ASSERT_EQ(st_field2, fields[1]);
5050
auto sort_order_str =
51-
"sort_order[order_id<100>,\n"
52-
" sort_field(source_id=5, transform=identity, direction=asc, "
53-
"null_order=nulls-first)\n"
54-
" sort_field(source_id=7, transform=identity, direction=desc, "
55-
"null_order=nulls-first)\n]";
56-
EXPECT_EQ(sort_order_str, sort_order.ToString());
57-
EXPECT_EQ(sort_order_str, std::format("{}", sort_order));
51+
"[\n"
52+
" identity(5) asc nulls-first\n"
53+
" identity(7) desc nulls-first\n"
54+
"]";
55+
EXPECT_EQ(sort_order.ToString(), sort_order_str);
56+
EXPECT_EQ(std::format("{}", sort_order), sort_order_str);
5857
}
5958
}
6059

@@ -84,4 +83,69 @@ TEST(SortOrderTest, Equality) {
8483
ASSERT_NE(sort_order1, sort_order5);
8584
ASSERT_NE(sort_order5, sort_order1);
8685
}
86+
87+
TEST(SortOrderTest, IsUnsorted) {
88+
auto unsorted = SortOrder::Unsorted();
89+
EXPECT_TRUE(unsorted->is_unsorted());
90+
EXPECT_FALSE(unsorted->is_sorted());
91+
}
92+
93+
TEST(SortOrderTest, IsSorted) {
94+
SchemaField field1(5, "ts", iceberg::timestamp(), true);
95+
auto identity_transform = Transform::Identity();
96+
SortField st_field1(5, identity_transform, SortDirection::kAscending,
97+
NullOrder::kFirst);
98+
SortOrder sorted_order(100, {st_field1});
99+
100+
EXPECT_TRUE(sorted_order.is_sorted());
101+
EXPECT_FALSE(sorted_order.is_unsorted());
102+
}
103+
104+
TEST(SortOrderTest, Satisfies) {
105+
SchemaField field1(5, "ts", iceberg::timestamp(), true);
106+
SchemaField field2(7, "bar", iceberg::string(), true);
107+
auto identity_transform = Transform::Identity();
108+
auto bucket_transform = Transform::Bucket(8);
109+
110+
SortField st_field1(5, identity_transform, SortDirection::kAscending,
111+
NullOrder::kFirst);
112+
SortField st_field2(7, identity_transform, SortDirection::kDescending,
113+
NullOrder::kFirst);
114+
SortField st_field3(7, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);
115+
116+
SortOrder sort_order1(100, {st_field1, st_field2});
117+
SortOrder sort_order2(101, {st_field1});
118+
SortOrder sort_order3(102, {st_field1, st_field3});
119+
SortOrder sort_order4(104, {st_field2});
120+
auto unsorted = SortOrder::Unsorted();
121+
122+
// Any order satisfies an unsorted order, including unsorted itself
123+
EXPECT_TRUE(unsorted->Satisfies(*unsorted));
124+
EXPECT_TRUE(sort_order1.Satisfies(*unsorted));
125+
EXPECT_TRUE(sort_order2.Satisfies(*unsorted));
126+
EXPECT_TRUE(sort_order3.Satisfies(*unsorted));
127+
128+
// Unsorted does not satisfy any sorted order
129+
EXPECT_FALSE(unsorted->Satisfies(sort_order1));
130+
EXPECT_FALSE(unsorted->Satisfies(sort_order2));
131+
EXPECT_FALSE(unsorted->Satisfies(sort_order3));
132+
133+
// A sort order satisfies itself
134+
EXPECT_TRUE(sort_order1.Satisfies(sort_order1));
135+
EXPECT_TRUE(sort_order2.Satisfies(sort_order2));
136+
EXPECT_TRUE(sort_order3.Satisfies(sort_order3));
137+
138+
// A sort order with more fields satisfy one with fewer fields
139+
EXPECT_TRUE(sort_order1.Satisfies(sort_order2));
140+
EXPECT_TRUE(sort_order3.Satisfies(sort_order2));
141+
142+
// A sort order does not satisfy one with more fields
143+
EXPECT_FALSE(sort_order2.Satisfies(sort_order1));
144+
EXPECT_FALSE(sort_order2.Satisfies(sort_order3));
145+
146+
// A sort order does not satify one with different fields
147+
EXPECT_FALSE(sort_order4.Satisfies(sort_order2));
148+
EXPECT_FALSE(sort_order2.Satisfies(sort_order4));
149+
}
150+
87151
} // namespace iceberg

src/iceberg/test/transform_test.cc

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,4 +720,125 @@ INSTANTIATE_TEST_SUITE_P(
720720
.source = Literal::Null(iceberg::string()),
721721
.expected = Literal::Null(iceberg::string())}));
722722

723+
TEST(TransformPreservesOrderTest, PreservesOrder) {
724+
struct Case {
725+
std::string transform_str;
726+
bool expected;
727+
};
728+
729+
const std::vector<Case> cases = {
730+
{.transform_str = "identity", .expected = true},
731+
{.transform_str = "year", .expected = true},
732+
{.transform_str = "month", .expected = true},
733+
{.transform_str = "day", .expected = true},
734+
{.transform_str = "hour", .expected = true},
735+
{.transform_str = "void", .expected = false},
736+
{.transform_str = "bucket[16]", .expected = false},
737+
{.transform_str = "truncate[32]", .expected = true},
738+
};
739+
740+
for (const auto& c : cases) {
741+
auto transform = TransformFromString(c.transform_str);
742+
ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;
743+
744+
EXPECT_EQ(transform.value()->PreservesOrder(), c.expected)
745+
<< "Unexpected result for transform: " << c.transform_str;
746+
}
747+
}
748+
749+
TEST(TransformSatisfiesOrderOfTest, SatisfiesOrderOf) {
750+
struct Case {
751+
std::string transform_str;
752+
std::string other_transform_str;
753+
bool expected;
754+
};
755+
756+
const std::vector<Case> cases = {
757+
// Identity satisfies all order-preserving transforms
758+
{.transform_str = "identity", .other_transform_str = "identity", .expected = true},
759+
{.transform_str = "identity", .other_transform_str = "year", .expected = true},
760+
{.transform_str = "identity", .other_transform_str = "month", .expected = true},
761+
{.transform_str = "identity", .other_transform_str = "day", .expected = true},
762+
{.transform_str = "identity", .other_transform_str = "hour", .expected = true},
763+
{.transform_str = "identity",
764+
.other_transform_str = "truncate[32]",
765+
.expected = true},
766+
{.transform_str = "identity",
767+
.other_transform_str = "bucket[16]",
768+
.expected = false},
769+
770+
// Truncate satisfies Truncate with smaller width
771+
{.transform_str = "truncate[32]",
772+
.other_transform_str = "truncate[16]",
773+
.expected = true},
774+
{.transform_str = "truncate[16]",
775+
.other_transform_str = "truncate[16]",
776+
.expected = true},
777+
{.transform_str = "truncate[16]",
778+
.other_transform_str = "truncate[32]",
779+
.expected = false},
780+
{.transform_str = "truncate[16]",
781+
.other_transform_str = "bucket[32]",
782+
.expected = false},
783+
784+
// Hour satisfies hour, day, month, and year
785+
{.transform_str = "hour", .other_transform_str = "hour", .expected = true},
786+
{.transform_str = "hour", .other_transform_str = "day", .expected = true},
787+
{.transform_str = "hour", .other_transform_str = "month", .expected = true},
788+
{.transform_str = "hour", .other_transform_str = "year", .expected = true},
789+
{.transform_str = "hour", .other_transform_str = "identity", .expected = false},
790+
{.transform_str = "hour", .other_transform_str = "bucket[16]", .expected = false},
791+
792+
// Day satisfies day, month, and year
793+
{.transform_str = "day", .other_transform_str = "day", .expected = true},
794+
{.transform_str = "day", .other_transform_str = "month", .expected = true},
795+
{.transform_str = "day", .other_transform_str = "year", .expected = true},
796+
{.transform_str = "day", .other_transform_str = "hour", .expected = false},
797+
{.transform_str = "day", .other_transform_str = "identity", .expected = false},
798+
799+
// Month satisfies month and year
800+
{.transform_str = "month", .other_transform_str = "month", .expected = true},
801+
{.transform_str = "month", .other_transform_str = "year", .expected = true},
802+
{.transform_str = "month", .other_transform_str = "day", .expected = false},
803+
{.transform_str = "month", .other_transform_str = "hour", .expected = false},
804+
805+
// Year satisfies only year
806+
{.transform_str = "year", .other_transform_str = "year", .expected = true},
807+
{.transform_str = "year", .other_transform_str = "month", .expected = false},
808+
{.transform_str = "year", .other_transform_str = "day", .expected = false},
809+
{.transform_str = "year", .other_transform_str = "hour", .expected = false},
810+
811+
// Void satisfies no order-preserving transforms
812+
{.transform_str = "void", .other_transform_str = "identity", .expected = false},
813+
{.transform_str = "void", .other_transform_str = "year", .expected = false},
814+
{.transform_str = "void", .other_transform_str = "month", .expected = false},
815+
{.transform_str = "void", .other_transform_str = "day", .expected = false},
816+
{.transform_str = "void", .other_transform_str = "hour", .expected = false},
817+
818+
// Bucket satisfies only itself
819+
{.transform_str = "bucket[16]",
820+
.other_transform_str = "bucket[16]",
821+
.expected = true},
822+
{.transform_str = "bucket[16]",
823+
.other_transform_str = "bucket[32]",
824+
.expected = false},
825+
{.transform_str = "bucket[16]",
826+
.other_transform_str = "identity",
827+
.expected = false},
828+
};
829+
830+
for (const auto& c : cases) {
831+
auto transform = TransformFromString(c.transform_str);
832+
auto other_transform = TransformFromString(c.other_transform_str);
833+
834+
ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;
835+
ASSERT_TRUE(other_transform.has_value())
836+
<< "Failed to parse: " << c.other_transform_str;
837+
838+
EXPECT_EQ(transform.value()->SatisfiesOrderOf(*other_transform.value()), c.expected)
839+
<< "Unexpected result for transform: " << c.transform_str
840+
<< " and other transform: " << c.other_transform_str;
841+
}
842+
}
843+
723844
} // namespace iceberg

0 commit comments

Comments
 (0)