Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/iceberg/result.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ enum class ErrorKind {
kNotFound,
kNotImplemented,
kNotSupported,
kValidationError,
kUnknownError,
};

Expand Down Expand Up @@ -97,6 +98,7 @@ DEFINE_ERROR_FUNCTION(NotAllowed)
DEFINE_ERROR_FUNCTION(NotFound)
DEFINE_ERROR_FUNCTION(NotImplemented)
DEFINE_ERROR_FUNCTION(NotSupported)
DEFINE_ERROR_FUNCTION(ValidationError)
DEFINE_ERROR_FUNCTION(UnknownError)

#undef DEFINE_ERROR_FUNCTION
Expand Down
121 changes: 120 additions & 1 deletion src/iceberg/sort_order.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,18 @@
#include "iceberg/sort_order.h"

#include <format>
#include <memory>
#include <optional>
#include <ranges>

#include "iceberg/exception.h"
#include "iceberg/expression/term.h"
#include "iceberg/result.h"
#include "iceberg/schema.h"
#include "iceberg/sort_field.h"
#include "iceberg/transform.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/macros.h"

namespace iceberg {

Expand All @@ -31,7 +40,7 @@ SortOrder::SortOrder(int32_t order_id, std::vector<SortField> fields)

const std::shared_ptr<SortOrder>& SortOrder::Unsorted() {
static const std::shared_ptr<SortOrder> unsorted =
std::make_shared<SortOrder>(/*order_id=*/0, std::vector<SortField>{});
std::make_shared<SortOrder>(kUnsortedOrderId, std::vector<SortField>{});
return unsorted;
}

Expand Down Expand Up @@ -80,4 +89,114 @@ bool SortOrder::Equals(const SortOrder& other) const {
return order_id_ == other.order_id_ && fields_ == other.fields_;
}

// SortOrderBuilder implementation

struct SortOrderBuilder::Impl {
const Schema* schema;
std::optional<int32_t> sort_id;
std::vector<SortField> fields;
bool case_sensitive{false};

explicit Impl(const Schema* schema) : schema(schema) {}
};

SortOrderBuilder::~SortOrderBuilder() = default;

SortOrderBuilder::SortOrderBuilder(SortOrderBuilder&&) noexcept = default;

SortOrderBuilder& SortOrderBuilder::operator=(SortOrderBuilder&&) noexcept = default;

SortOrderBuilder::SortOrderBuilder(const Schema* schema)
: impl_(std::make_unique<Impl>(schema)) {}

std::unique_ptr<SortOrderBuilder> SortOrderBuilder::BuildFromSchema(
const Schema* schema) {
return std::unique_ptr<SortOrderBuilder>(new SortOrderBuilder(schema)); // NOLINT
}

SortOrderBuilder& SortOrderBuilder::WithOrderId(int32_t sort_id) {
impl_->sort_id = sort_id;
return *this;
}

SortOrderBuilder& SortOrderBuilder::CaseSensitive(bool case_sensitive) {
impl_->case_sensitive = case_sensitive;
return *this;
}

Result<std::unique_ptr<SortOrder>> SortOrderBuilder::BuildUncheckd() {
if (impl_->fields.empty()) {
if (impl_->sort_id.has_value() && impl_->sort_id != SortOrder::kUnsortedOrderId) {
return InvalidArgument("Unsorted order ID must be 0");
}
return std::make_unique<SortOrder>(SortOrder::kUnsortedOrderId,
std::vector<SortField>{});
}

if (impl_->sort_id.has_value() && impl_->sort_id == SortOrder::kUnsortedOrderId) {
return InvalidArgument("Sort order ID 0 is reserved for unsorted order");
}

// default ID to 1 as 0 is reserved for unsorted order
return std::make_unique<SortOrder>(
impl_->sort_id.value_or(SortOrder::kInitialSortOrderId), std::move(impl_->fields));
}

Result<std::unique_ptr<SortOrder>> SortOrderBuilder::Build() {
ICEBERG_ASSIGN_OR_RAISE(auto sort_order, BuildUncheckd());
ICEBERG_RETURN_UNEXPECTED(CheckCompatibility(sort_order, impl_->schema));
return sort_order;
}

SortOrderBuilder& SortOrderBuilder::AddSortField(
int32_t source_id, const std::shared_ptr<Transform>& transform,
SortDirection direction, NullOrder null_order) {
impl_->fields.emplace_back(source_id, transform, direction, null_order);
return *this;
}

SortOrderBuilder& SortOrderBuilder::AddSortField(const std::shared_ptr<Term>& term,
SortDirection direction,
NullOrder null_order) {
if (auto named_ref = std::dynamic_pointer_cast<NamedReference>(term)) {
auto bound_ref = named_ref->Bind(*impl_->schema, impl_->case_sensitive);
ICEBERG_CHECK(bound_ref.has_value(), "Failed to bind named reference to schema.");
int32_t source_id = bound_ref.value()->field().field_id();
impl_->fields.emplace_back(source_id, Transform::Identity(), direction, null_order);
} else if (auto unbound_transform = std::dynamic_pointer_cast<UnboundTransform>(term)) {
auto bound_transform = unbound_transform->Bind(*impl_->schema, impl_->case_sensitive);
ICEBERG_CHECK(bound_transform.has_value(),
"Failed to bind unbound transform to schema.");
int32_t source_id = bound_transform.value()->reference()->field().field_id();
impl_->fields.emplace_back(source_id, bound_transform.value()->transform(), direction,
null_order);
} else {
throw IcebergError(std::format(
"Invalid term: {}, expected either a named reference or an unbound transform",
term ? term->ToString() : "null"));
}

return *this;
}

Status SortOrderBuilder::CheckCompatibility(const std::unique_ptr<SortOrder>& sort_order,
const Schema* schema) {
for (const auto& field : sort_order->fields()) {
ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema->FindFieldById(field.source_id()));
if (schema_field == std::nullopt) {
return ValidationError("Cannot find source column for sort field: {}", field);
}

const auto& source_type = schema_field.value().get().type();

if (!source_type->is_primitive()) {
return ValidationError("Cannot sort by non-primitive source field: {}",
*source_type);
}

ICEBERG_RETURN_UNEXPECTED(field.transform()->ResultType(source_type));
}
return {};
}

} // namespace iceberg
86 changes: 86 additions & 0 deletions src/iceberg/sort_order.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@
#pragma once

#include <cstdint>
#include <memory>
#include <span>
#include <vector>

#include "iceberg/expression/expressions.h"
#include "iceberg/expression/term.h"
#include "iceberg/iceberg_export.h"
#include "iceberg/sort_field.h"
#include "iceberg/type_fwd.h"
#include "iceberg/util/formattable.h"

namespace iceberg {
Expand All @@ -36,6 +40,7 @@ namespace iceberg {
/// applied to the data.
class ICEBERG_EXPORT SortOrder : public util::Formattable {
public:
static constexpr int32_t kUnsortedOrderId = 0;
static constexpr int32_t kInitialSortOrderId = 1;

SortOrder(int32_t order_id, std::vector<SortField> fields);
Expand Down Expand Up @@ -77,4 +82,85 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable {
std::vector<SortField> fields_;
};

/// \brief A builder used to create valid SortOrder instances.
class ICEBERG_EXPORT SortOrderBuilder {
public:
/// \brief Create a builder for a new SortOrder
///
/// \return A new SortOrderBuilder instance initialized with Schema
static std::unique_ptr<SortOrderBuilder> BuildFromSchema(const Schema* schema);

/// \brief Add an expression term to the sort, ascending with the given null order.
SortOrderBuilder& Asc(const std::shared_ptr<Term>& term, NullOrder null_order) {
return AddSortField(term, SortDirection::kAscending, null_order);
}

/// \brief Add an expression term to the sort, descending with the given null order.
SortOrderBuilder& Desc(const std::shared_ptr<Term>& term, NullOrder null_order) {
return AddSortField(term, SortDirection::kDescending, null_order);
}

/// \brief Add a sort field to the sort order.
SortOrderBuilder& SortBy(std::string name, SortDirection direction,
NullOrder null_order) {
return AddSortField(Expressions::Ref(std::move(name)), direction, null_order);
}

/// \brief Add a sort field to the sort order.
SortOrderBuilder& SortBy(const std::shared_ptr<Term>& term, SortDirection direction,
NullOrder null_order) {
return AddSortField(term, direction, null_order);
}

/// \brief Set sort id to the sort order.
SortOrderBuilder& WithOrderId(int32_t sort_id);

/// \brief Set case sensitive to the sort order.
SortOrderBuilder& CaseSensitive(bool case_sensitive);

/// \brief Add a sort field to the sort order with the specified source field ID,
/// transform, direction, and null order.
///
/// \param source_id The source field ID.
/// \param transform The transform to apply to the field.
/// \param direction The sort direction.
/// \param null_order The null ordering behavior (e.g., nulls first or nulls last).
SortOrderBuilder& AddSortField(int32_t source_id,
const std::shared_ptr<Transform>& transform,
SortDirection direction, NullOrder null_order);

/// \brief Builds a SortOrder instance.
///
/// \return A Result containing the constructed SortOrder or an error
Result<std::unique_ptr<SortOrder>> Build();

/// \brief Destructor
~SortOrderBuilder();

// Delete copy operations (use BuildFromSchema to create a new builder)
SortOrderBuilder(const SortOrderBuilder&) = delete;
SortOrderBuilder& operator=(const SortOrderBuilder&) = delete;

// Enable move operations
SortOrderBuilder(SortOrderBuilder&&) noexcept;
SortOrderBuilder& operator=(SortOrderBuilder&&) noexcept;

private:
/// \brief Private constructor for building from Schema
explicit SortOrderBuilder(const Schema* schema);

SortOrderBuilder& AddSortField(const std::shared_ptr<Term>& term,
SortDirection direction, NullOrder null_order);

/// \brief Builds an unchecked SortOrder instance.
Result<std::unique_ptr<SortOrder>> BuildUncheckd();

static Status CheckCompatibility(const std::unique_ptr<SortOrder>& sort_order,
const Schema* schema);

/// Internal state members
struct Impl;
std::unique_ptr<Impl> impl_;
};

} // namespace iceberg
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ add_iceberg_test(schema_test
partition_field_test.cc
partition_spec_test.cc
sort_field_test.cc
sort_order_builder_test.cc
sort_order_test.cc
snapshot_test.cc
schema_util_test.cc)
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ iceberg_tests = {
'schema_util_test.cc',
'snapshot_test.cc',
'sort_field_test.cc',
'sort_order_builder_test.cc',
'sort_order_test.cc',
'transform_test.cc',
'type_test.cc',
Expand Down
2 changes: 1 addition & 1 deletion src/iceberg/test/schema_field_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ TEST(SchemaFieldTest, Equality) {
iceberg::SchemaField field1(1, "foo", iceberg::int32(), false);
iceberg::SchemaField field2(2, "foo", iceberg::int32(), false);
iceberg::SchemaField field3(1, "bar", iceberg::int32(), false);
iceberg::SchemaField field4(1, "foo", std::make_shared<iceberg::LongType>(), false);
iceberg::SchemaField field4(1, "foo", iceberg::int64(), false);
iceberg::SchemaField field5(1, "foo", iceberg::int32(), true);
iceberg::SchemaField field6(1, "foo", iceberg::int32(), false);

Expand Down
1 change: 0 additions & 1 deletion src/iceberg/test/schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include "gtest/gtest.h"
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
Expand Down
Loading
Loading