Skip to content

Commit 91e3a72

Browse files
committed
feat: sort order
Signed-off-by: Junwang Zhao <[email protected]>
1 parent d54d079 commit 91e3a72

File tree

8 files changed

+478
-1
lines changed

8 files changed

+478
-1
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ set(ICEBERG_SOURCES
2525
schema_internal.cc
2626
partition_field.cc
2727
partition_spec.cc
28+
sort_field.cc
29+
sort_order.cc
2830
transform.cc
2931
type.cc)
3032

src/iceberg/sort_field.cc

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/sort_field.h"
21+
22+
#include <format>
23+
24+
#include "iceberg/transform.h"
25+
#include "iceberg/type.h"
26+
#include "iceberg/util/formatter.h"
27+
28+
namespace iceberg {
29+
30+
namespace {
31+
/// \brief Get the relative sort direction name
32+
constexpr std::string_view ToString(SortDirection direction) {
33+
switch (direction) {
34+
case SortDirection::kAscending:
35+
return "asc";
36+
case SortDirection::kDescending:
37+
return "desc";
38+
default:
39+
return "invalid";
40+
}
41+
}
42+
43+
/// \brief Get the relative null order name
44+
constexpr std::string_view ToString(NullOrder null_order) {
45+
switch (null_order) {
46+
case NullOrder::kFirst:
47+
return "nulls-first";
48+
case NullOrder::kLast:
49+
return "nulls-last";
50+
default:
51+
return "invalid";
52+
}
53+
}
54+
} // namespace
55+
56+
SortField::SortField(int32_t source_id, std::shared_ptr<TransformFunction> transform,
57+
SortDirection sort_direction, NullOrder null_order)
58+
: source_id_(source_id),
59+
transform_(std::move(transform)),
60+
sort_direction_(sort_direction),
61+
null_order_(null_order) {}
62+
63+
int32_t SortField::source_id() const { return source_id_; }
64+
65+
std::shared_ptr<TransformFunction> const& SortField::transform() const {
66+
return transform_;
67+
}
68+
69+
SortDirection SortField::sort_direction() const { return sort_direction_; }
70+
71+
NullOrder SortField::null_order() const { return null_order_; }
72+
73+
std::string SortField::ToString() const {
74+
return std::format(
75+
"SortField(source_id={}, transform={}, sort_direction={}, null_order={})",
76+
source_id_, *transform_, iceberg::ToString(sort_direction_),
77+
iceberg::ToString(null_order_));
78+
}
79+
80+
bool SortField::Equals(const SortField& other) const {
81+
return source_id_ == other.source_id_ && *transform_ == *other.transform_ &&
82+
sort_direction_ == other.sort_direction_ && null_order_ == other.null_order_;
83+
}
84+
85+
} // namespace iceberg

src/iceberg/sort_field.h

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/sort_field.h
23+
/// A sort field in a sort order
24+
25+
#include <cstdint>
26+
#include <memory>
27+
#include <string>
28+
#include <string_view>
29+
#include <vector>
30+
31+
#include "iceberg/iceberg_export.h"
32+
#include "iceberg/type_fwd.h"
33+
#include "iceberg/util/formattable.h"
34+
35+
namespace iceberg {
36+
37+
/// \brief Sort direction in a partition, either ascending or descending
38+
enum class SortDirection {
39+
/// Ascending
40+
kAscending,
41+
/// Descending
42+
kDescending,
43+
};
44+
45+
enum class NullOrder {
46+
/// Nulls are sorted first
47+
kFirst,
48+
/// Nulls are sorted last
49+
kLast,
50+
};
51+
52+
/// \brief a field with its transform.
53+
class ICEBERG_EXPORT SortField : public util::Formattable {
54+
public:
55+
/// \brief Construct a field.
56+
/// \param[in] source_id The source field ID.
57+
/// \param[in] transform The transform function.
58+
/// \param[in] sort_direction The sort direction.
59+
/// \param[in] null_order The null order.
60+
SortField(int32_t source_id, std::shared_ptr<TransformFunction> transform,
61+
SortDirection sort_direction, NullOrder null_order);
62+
63+
/// \brief Get the source field ID.
64+
int32_t source_id() const;
65+
66+
/// \brief Get the transform type.
67+
const std::shared_ptr<TransformFunction>& transform() const;
68+
69+
/// \brief Get the sort direction.
70+
SortDirection sort_direction() const;
71+
72+
/// \brief Get the null order.
73+
NullOrder null_order() const;
74+
75+
std::string ToString() const override;
76+
77+
friend bool operator==(const SortField& lhs, const SortField& rhs) {
78+
return lhs.Equals(rhs);
79+
}
80+
81+
friend bool operator!=(const SortField& lhs, const SortField& rhs) {
82+
return !(lhs == rhs);
83+
}
84+
85+
private:
86+
/// \brief Compare two fields for equality.
87+
[[nodiscard]] bool Equals(const SortField& other) const;
88+
89+
int32_t source_id_;
90+
std::shared_ptr<TransformFunction> transform_;
91+
SortDirection sort_direction_;
92+
NullOrder null_order_;
93+
};
94+
95+
} // namespace iceberg

src/iceberg/sort_order.cc

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/sort_order.h"
21+
22+
#include <format>
23+
24+
#include "iceberg/util/formatter.h"
25+
26+
namespace iceberg {
27+
28+
SortOrder::SortOrder(int64_t order_id, std::vector<SortField> fields)
29+
: order_id_(order_id), fields_(std::move(fields)) {}
30+
31+
int64_t SortOrder::order_id() const { return order_id_; }
32+
33+
std::span<const SortField> SortOrder::fields() const { return fields_; }
34+
35+
std::string SortOrder::ToString() const {
36+
std::string repr = std::format("sort_order[order_id<{}>,\n", order_id_);
37+
for (const auto& field : fields_) {
38+
std::format_to(std::back_inserter(repr), " {}\n", field);
39+
}
40+
repr += "]";
41+
return repr;
42+
}
43+
44+
bool SortOrder::Equals(const SortOrder& other) const {
45+
return order_id_ == other.order_id_ && fields_ == other.fields_;
46+
}
47+
48+
} // namespace iceberg

src/iceberg/sort_order.h

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <cstdint>
23+
#include <span>
24+
#include <vector>
25+
26+
#include "iceberg/iceberg_export.h"
27+
#include "iceberg/sort_field.h"
28+
#include "iceberg/util/formattable.h"
29+
30+
namespace iceberg {
31+
32+
/// \brief A sort order for a table
33+
///
34+
/// A sort order is defined by a sort order id and a list of sort fields.
35+
/// The order of the sort fields within the list defines the order in which the sort is
36+
/// applied to the data.
37+
class ICEBERG_EXPORT SortOrder : public util::Formattable {
38+
public:
39+
SortOrder(int64_t order_id, std::vector<SortField> fields);
40+
41+
/// \brief Get the sort order id.
42+
int64_t order_id() const;
43+
44+
/// \brief Get the list of sort fields.
45+
std::span<const SortField> fields() const;
46+
47+
std::string ToString() const override;
48+
49+
friend bool operator==(const SortOrder& lhs, const SortOrder& rhs) {
50+
return lhs.Equals(rhs);
51+
}
52+
53+
friend bool operator!=(const SortOrder& lhs, const SortOrder& rhs) {
54+
return !(lhs == rhs);
55+
}
56+
57+
private:
58+
/// \brief Compare two sort orders for equality.
59+
bool Equals(const SortOrder& other) const;
60+
61+
int64_t order_id_;
62+
std::vector<SortField> fields_;
63+
};
64+
65+
} // namespace iceberg

test/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ target_sources(schema_test
3030
type_test.cc
3131
transform_test.cc
3232
partition_field_test.cc
33-
partition_spec_test.cc)
33+
partition_spec_test.cc
34+
sort_field_test.cc
35+
sort_order_test.cc)
3436
target_link_libraries(schema_test PRIVATE iceberg_static GTest::gtest_main GTest::gmock)
3537
add_test(NAME schema_test COMMAND schema_test)
3638

test/sort_field_test.cc

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/sort_field.h"
21+
22+
#include <format>
23+
24+
#include <gtest/gtest.h>
25+
26+
#include "iceberg/transform.h"
27+
#include "iceberg/util/formatter.h"
28+
29+
namespace iceberg {
30+
31+
namespace {
32+
class TestTransformFunction : public TransformFunction {
33+
public:
34+
TestTransformFunction() : TransformFunction(TransformType::kUnknown) {}
35+
expected<ArrowArray, Error> Transform(const ArrowArray& input) override {
36+
return unexpected(
37+
Error{.kind = ErrorKind::kNotSupported, .message = "test transform function"});
38+
}
39+
};
40+
41+
} // namespace
42+
43+
TEST(SortFieldTest, Basics) {
44+
{
45+
const auto transform = std::make_shared<IdentityTransformFunction>();
46+
SortField field(1, transform, SortDirection::kAscending, NullOrder::kFirst);
47+
EXPECT_EQ(1, field.source_id());
48+
EXPECT_EQ(*transform, *field.transform());
49+
EXPECT_EQ(SortDirection::kAscending, field.sort_direction());
50+
EXPECT_EQ(NullOrder::kFirst, field.null_order());
51+
EXPECT_EQ(
52+
"SortField(source_id=1, transform=identity, sort_direction=asc, "
53+
"null_order=nulls-first)",
54+
field.ToString());
55+
EXPECT_EQ(
56+
"SortField(source_id=1, transform=identity, sort_direction=asc, "
57+
"null_order=nulls-first)",
58+
std::format("{}", field));
59+
}
60+
}
61+
62+
TEST(SortFieldTest, Equality) {
63+
auto test_transform = std::make_shared<TestTransformFunction>();
64+
auto identity_transform = std::make_shared<IdentityTransformFunction>();
65+
66+
SortField field1(1, test_transform, SortDirection::kAscending, NullOrder::kFirst);
67+
SortField field2(2, test_transform, SortDirection::kAscending, NullOrder::kFirst);
68+
SortField field3(1, identity_transform, SortDirection::kAscending, NullOrder::kFirst);
69+
SortField field4(1, test_transform, SortDirection::kDescending, NullOrder::kFirst);
70+
SortField field5(1, test_transform, SortDirection::kAscending, NullOrder::kLast);
71+
72+
ASSERT_EQ(field1, field1);
73+
ASSERT_NE(field1, field2);
74+
ASSERT_NE(field2, field1);
75+
ASSERT_NE(field1, field3);
76+
ASSERT_NE(field3, field1);
77+
ASSERT_NE(field1, field4);
78+
ASSERT_NE(field4, field1);
79+
ASSERT_NE(field1, field5);
80+
ASSERT_NE(field5, field1);
81+
}
82+
} // namespace iceberg

0 commit comments

Comments
 (0)