Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
set(ICEBERG_SOURCES
arrow_c_data_internal.cc
demo.cc
json_internal.cc
schema.cc
schema_field.cc
schema_internal.cc
Expand Down
112 changes: 112 additions & 0 deletions src/iceberg/json_internal.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/json_internal.h"

#include <format>

#include <nlohmann/json.hpp>

#include "iceberg/sort_order.h"
#include "iceberg/transform.h"
#include "iceberg/util/formatter.h"

namespace iceberg {

namespace {

constexpr std::string_view kTransform = "transform";
constexpr std::string_view kSourceId = "source-id";
constexpr std::string_view kDirection = "direction";
constexpr std::string_view kNullOrder = "null-order";

constexpr std::string_view kOrderId = "order-id";
constexpr std::string_view kFields = "fields";

// --- helper for safe JSON extraction ---
template <typename T>
expected<T, Error> GetJsonValue(const nlohmann::json& json, std::string_view key) {
if (!json.contains(key)) {
return unexpected<Error>({.kind = ErrorKind::kInvalidArgument,
.message = "Missing key: " + std::string(key)});
}
try {
return json.at(key).get<T>();
} catch (const std::exception& ex) {
return unexpected<Error>({.kind = ErrorKind::kInvalidArgument,
.message = std::string("Failed to parse key: ") +
key.data() + ", " + ex.what()});
}
}

#define TRY_ASSIGN(json_value, expr) \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll probably want to make a broader version of this macro. (Not necessarily in this PR)

FWIW, you could steal the Arrow version, which uses a counter to autogenerate the name, so it can be used like ARROW_ASSIGN_OR_RAISE(auto value, ...) or ARROW_ASSIGN_OR_RAISE(foo.bar, ...) too

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto _tmp_##json_value = (expr); \
if (!_tmp_##json_value) return unexpected(_tmp_##json_value.error()); \
auto json_value = std::move(_tmp_##json_value.value());
} // namespace

nlohmann::json ToJson(const SortField& sort_field) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about adding ToJson and FromJson member functions to each Spec classes, but your way might be better, users don't need to touch the json ser/der functions, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is no need to introduce json either.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in this way we don't need to expose our internal json dependency.

nlohmann::json json;
json[kTransform] = std::format("{}", *sort_field.transform());
json[kSourceId] = sort_field.source_id();
json[kDirection] = SortDirectionToString(sort_field.direction());
json[kNullOrder] = NullOrderToString(sort_field.null_order());
return json;
}

nlohmann::json ToJson(const SortOrder& sort_order) {
nlohmann::json json;
json[kOrderId] = sort_order.order_id();

nlohmann::json fields_json = nlohmann::json::array();
for (const auto& field : sort_order.fields()) {
fields_json.push_back(ToJson(field));
}
json[kFields] = fields_json;
return json;
}

expected<std::unique_ptr<SortField>, Error> SortFieldFromJson(
const nlohmann::json& json) {
TRY_ASSIGN(transform_str, GetJsonValue<std::string>(json, kTransform));
TRY_ASSIGN(transform, TransformFunctionFromString(transform_str));
TRY_ASSIGN(source_id, GetJsonValue<int32_t>(json, kSourceId));
TRY_ASSIGN(direction_str, GetJsonValue<std::string>(json, kDirection));
TRY_ASSIGN(direction, SortDirectionFromString(direction_str));
TRY_ASSIGN(null_order_str, GetJsonValue<std::string>(json, kNullOrder));
TRY_ASSIGN(null_order, NullOrderFromString(null_order_str));

return std::make_unique<SortField>(source_id, std::move(transform), direction,
null_order);
}

expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson(
const nlohmann::json& json) {
TRY_ASSIGN(order_id, GetJsonValue<int32_t>(json, kOrderId));

std::vector<SortField> sort_fields;
for (const auto& field_json : json.at(kFields)) {
TRY_ASSIGN(sort_field, SortFieldFromJson(field_json));
sort_fields.push_back(*sort_field);
}

return std::make_unique<SortOrder>(order_id, std::move(sort_fields));
}

} // namespace iceberg
73 changes: 73 additions & 0 deletions src/iceberg/json_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <memory>

#include <nlohmann/json_fwd.hpp>

#include "iceberg/error.h"
#include "iceberg/expected.h"
#include "iceberg/type_fwd.h"

namespace iceberg {
/// \brief Serializes a `SortField` object to JSON.
///
/// This function converts a `SortField` object into a JSON representation.
/// The resulting JSON object includes the transform type, source ID, sort direction, and
/// null ordering.
///
/// \param sort_field The `SortField` object to be serialized.
/// \return A JSON object representing the `SortField` in the form of key-value pairs.
nlohmann::json ToJson(const SortField& sort_field);

/// \brief Serializes a `SortOrder` object to JSON.
///
/// This function converts a `SortOrder` object into a JSON representation.
/// The resulting JSON includes the order ID and a list of `SortField` objects.
/// Each `SortField` is serialized as described in the `ToJson(SortField)` function.
///
/// \param sort_order The `SortOrder` object to be serialized.
/// \return A JSON object representing the `SortOrder` with its order ID and fields array.
nlohmann::json ToJson(const SortOrder& sort_order);

/// \brief Deserializes a JSON object into a `SortField` object.
///
/// This function parses the provided JSON and creates a `SortField` object.
/// It expects the JSON object to contain keys for the transform, source ID, direction,
/// and null order.
///
/// \param json The JSON object representing a `SortField`.
/// \return An `expected` value containing either a `SortField` object or an error. If the
/// JSON is malformed or missing expected fields, an error will be returned.
expected<std::unique_ptr<SortField>, Error> SortFieldFromJson(const nlohmann::json& json);

/// \brief Deserializes a JSON object into a `SortOrder` object.
///
/// This function parses the provided JSON and creates a `SortOrder` object.
/// It expects the JSON object to contain the order ID and a list of `SortField` objects.
/// Each `SortField` will be parsed using the `SortFieldFromJson` function.
///
/// \param json The JSON object representing a `SortOrder`.
/// \return An `expected` value containing either a `SortOrder` object or an error. If the
/// JSON is malformed or missing expected fields, an error will be returned.
expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson(const nlohmann::json& json);

} // namespace iceberg
28 changes: 1 addition & 27 deletions src/iceberg/sort_field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,32 +27,6 @@

namespace iceberg {

namespace {
/// \brief Get the relative sort direction name
constexpr std::string_view ToString(SortDirection direction) {
switch (direction) {
case SortDirection::kAscending:
return "asc";
case SortDirection::kDescending:
return "desc";
default:
return "invalid";
}
}

/// \brief Get the relative null order name
constexpr std::string_view ToString(NullOrder null_order) {
switch (null_order) {
case NullOrder::kFirst:
return "nulls-first";
case NullOrder::kLast:
return "nulls-last";
default:
return "invalid";
}
}
} // namespace

SortField::SortField(int32_t source_id, std::shared_ptr<TransformFunction> transform,
SortDirection direction, NullOrder null_order)
: source_id_(source_id),
Expand All @@ -73,7 +47,7 @@ NullOrder SortField::null_order() const { return null_order_; }
std::string SortField::ToString() const {
return std::format(
"sort_field(source_id={}, transform={}, direction={}, null_order={})", source_id_,
*transform_, iceberg::ToString(direction_), iceberg::ToString(null_order_));
*transform_, SortDirectionToString(direction_), NullOrderToString(null_order_));
}

bool SortField::Equals(const SortField& other) const {
Expand Down
41 changes: 41 additions & 0 deletions src/iceberg/sort_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include <string_view>
#include <vector>

#include "iceberg/error.h"
#include "iceberg/expected.h"
#include "iceberg/iceberg_export.h"
#include "iceberg/type_fwd.h"
#include "iceberg/util/formattable.h"
Expand All @@ -41,13 +43,52 @@ enum class SortDirection {
/// Descending
kDescending,
};
/// \brief Get the relative sort direction name
ICEBERG_EXPORT constexpr std::string_view SortDirectionToString(SortDirection direction) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we should probably only declare these functions here and put all implementation in the .cc file.

switch (direction) {
case SortDirection::kAscending:
return "asc";
case SortDirection::kDescending:
return "desc";
default:
return "invalid";
}
}
/// \brief Get the relative sort direction from name
ICEBERG_EXPORT constexpr expected<SortDirection, Error> SortDirectionFromString(
std::string_view str) {
if (str == "asc") return SortDirection::kAscending;
if (str == "desc") return SortDirection::kDescending;
return unexpected<Error>(
{.kind = ErrorKind::kInvalidArgument,
.message = "Invalid SortDirection string: " + std::string(str)});
}

enum class NullOrder {
/// Nulls are sorted first
kFirst,
/// Nulls are sorted last
kLast,
};
/// \brief Get the relative null order name
ICEBERG_EXPORT constexpr std::string_view NullOrderToString(NullOrder null_order) {
switch (null_order) {
case NullOrder::kFirst:
return "nulls-first";
case NullOrder::kLast:
return "nulls-last";
default:
return "invalid";
}
}
/// \brief Get the relative null order from name
ICEBERG_EXPORT constexpr expected<NullOrder, Error> NullOrderFromString(
std::string_view str) {
if (str == "nulls-first") return NullOrder::kFirst;
if (str == "nulls-last") return NullOrder::kLast;
return unexpected<Error>({.kind = ErrorKind::kInvalidArgument,
.message = "Invalid NullOrder string: " + std::string(str)});
}

/// \brief a field with its transform.
class ICEBERG_EXPORT SortField : public util::Formattable {
Expand Down
10 changes: 10 additions & 0 deletions src/iceberg/transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,14 @@ expected<ArrowArray, Error> IdentityTransformFunction::Transform(
.message = "IdentityTransformFunction::Transform"});
}

expected<std::unique_ptr<TransformFunction>, Error> TransformFunctionFromString(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related, but should we do something like define iceberg::Result<T> = expected<T, iceberg::Error>?

std::string_view str) {
if (str == "identity") {
return std::make_unique<IdentityTransformFunction>();
}
return unexpected<Error>(
{.kind = ErrorKind::kInvalidArgument,
.message = "Invalid TransformFunction string: " + std::string(str)});
}

} // namespace iceberg
5 changes: 4 additions & 1 deletion src/iceberg/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ class ICEBERG_EXPORT TransformFunction : public util::Formattable {
TransformType transform_type_;
};

class IdentityTransformFunction : public TransformFunction {
ICEBERG_EXPORT expected<std::unique_ptr<TransformFunction>, Error>
TransformFunctionFromString(std::string_view str);

class ICEBERG_EXPORT IdentityTransformFunction : public TransformFunction {
public:
IdentityTransformFunction();
/// \brief Transform will take an input array and transform it into a new array.
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/type_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class Transaction;
class HistoryEntry;
class PartitionSpec;
class Snapshot;
class SortField;
class SortOrder;
class StructLike;
class TableMetadata;
Expand Down
3 changes: 2 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ fetchcontent_makeavailable(googletest)

add_executable(schema_test)
target_sources(schema_test
PRIVATE schema_test.cc
PRIVATE json_internal_test.cc
schema_test.cc
schema_field_test.cc
type_test.cc
transform_test.cc
Expand Down
Loading
Loading