-
Notifications
You must be signed in to change notification settings - Fork 70
feat: sort field/order json serialize/deserialization #64
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| #include "iceberg/json_internal.h" | ||
|
|
||
| #include <format> | ||
|
|
||
| #include <nlohmann/json.hpp> | ||
|
|
||
| #include "iceberg/sort_order.h" | ||
| #include "iceberg/transform.h" | ||
| #include "iceberg/util/formatter.h" | ||
|
|
||
| namespace iceberg { | ||
|
|
||
| namespace { | ||
|
|
||
| constexpr std::string_view kTransform = "transform"; | ||
| constexpr std::string_view kSourceId = "source-id"; | ||
| constexpr std::string_view kDirection = "direction"; | ||
| constexpr std::string_view kNullOrder = "null-order"; | ||
|
|
||
| constexpr std::string_view kOrderId = "order-id"; | ||
| constexpr std::string_view kFields = "fields"; | ||
|
|
||
| // --- helper for safe JSON extraction --- | ||
| template <typename T> | ||
| expected<T, Error> GetJsonValue(const nlohmann::json& json, std::string_view key) { | ||
| if (!json.contains(key)) { | ||
| return unexpected<Error>({.kind = ErrorKind::kInvalidArgument, | ||
| .message = "Missing key: " + std::string(key)}); | ||
| } | ||
| try { | ||
| return json.at(key).get<T>(); | ||
| } catch (const std::exception& ex) { | ||
| return unexpected<Error>({.kind = ErrorKind::kInvalidArgument, | ||
| .message = std::string("Failed to parse key: ") + | ||
| key.data() + ", " + ex.what()}); | ||
| } | ||
| } | ||
|
|
||
| #define TRY_ASSIGN(json_value, expr) \ | ||
| auto _tmp_##json_value = (expr); \ | ||
| if (!_tmp_##json_value) return unexpected(_tmp_##json_value.error()); \ | ||
| auto json_value = std::move(_tmp_##json_value.value()); | ||
| } // namespace | ||
|
|
||
| nlohmann::json ToJson(const SortField& sort_field) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking about adding ToJson and FromJson member functions to each Spec classes, but your way might be better, users don't need to touch the json ser/der functions, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, there is no need to introduce json either.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, in this way we don't need to expose our internal json dependency. |
||
| nlohmann::json json; | ||
| json[kTransform] = std::format("{}", *sort_field.transform()); | ||
| json[kSourceId] = sort_field.source_id(); | ||
| json[kDirection] = SortDirectionToString(sort_field.direction()); | ||
| json[kNullOrder] = NullOrderToString(sort_field.null_order()); | ||
| return json; | ||
| } | ||
|
|
||
| nlohmann::json ToJson(const SortOrder& sort_order) { | ||
| nlohmann::json json; | ||
| json[kOrderId] = sort_order.order_id(); | ||
|
|
||
| nlohmann::json fields_json = nlohmann::json::array(); | ||
| for (const auto& field : sort_order.fields()) { | ||
| fields_json.push_back(ToJson(field)); | ||
| } | ||
| json[kFields] = fields_json; | ||
| return json; | ||
| } | ||
|
|
||
| expected<std::unique_ptr<SortField>, Error> SortFieldFromJson( | ||
| const nlohmann::json& json) { | ||
| TRY_ASSIGN(transform_str, GetJsonValue<std::string>(json, kTransform)); | ||
| TRY_ASSIGN(transform, TransformFunctionFromString(transform_str)); | ||
| TRY_ASSIGN(source_id, GetJsonValue<int32_t>(json, kSourceId)); | ||
| TRY_ASSIGN(direction_str, GetJsonValue<std::string>(json, kDirection)); | ||
| TRY_ASSIGN(direction, SortDirectionFromString(direction_str)); | ||
| TRY_ASSIGN(null_order_str, GetJsonValue<std::string>(json, kNullOrder)); | ||
| TRY_ASSIGN(null_order, NullOrderFromString(null_order_str)); | ||
|
|
||
| return std::make_unique<SortField>(source_id, std::move(transform), direction, | ||
| null_order); | ||
| } | ||
|
|
||
| expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson( | ||
| const nlohmann::json& json) { | ||
| TRY_ASSIGN(order_id, GetJsonValue<int32_t>(json, kOrderId)); | ||
|
|
||
| std::vector<SortField> sort_fields; | ||
| for (const auto& field_json : json.at(kFields)) { | ||
| TRY_ASSIGN(sort_field, SortFieldFromJson(field_json)); | ||
| sort_fields.push_back(*sort_field); | ||
| } | ||
|
|
||
| return std::make_unique<SortOrder>(order_id, std::move(sort_fields)); | ||
| } | ||
|
|
||
| } // namespace iceberg | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <memory> | ||
|
|
||
| #include <nlohmann/json_fwd.hpp> | ||
|
|
||
| #include "iceberg/error.h" | ||
| #include "iceberg/expected.h" | ||
| #include "iceberg/type_fwd.h" | ||
|
|
||
| namespace iceberg { | ||
| /// \brief Serializes a `SortField` object to JSON. | ||
| /// | ||
| /// This function converts a `SortField` object into a JSON representation. | ||
| /// The resulting JSON object includes the transform type, source ID, sort direction, and | ||
| /// null ordering. | ||
| /// | ||
| /// \param sort_field The `SortField` object to be serialized. | ||
| /// \return A JSON object representing the `SortField` in the form of key-value pairs. | ||
| nlohmann::json ToJson(const SortField& sort_field); | ||
|
|
||
| /// \brief Serializes a `SortOrder` object to JSON. | ||
| /// | ||
| /// This function converts a `SortOrder` object into a JSON representation. | ||
| /// The resulting JSON includes the order ID and a list of `SortField` objects. | ||
| /// Each `SortField` is serialized as described in the `ToJson(SortField)` function. | ||
| /// | ||
| /// \param sort_order The `SortOrder` object to be serialized. | ||
| /// \return A JSON object representing the `SortOrder` with its order ID and fields array. | ||
| nlohmann::json ToJson(const SortOrder& sort_order); | ||
|
|
||
| /// \brief Deserializes a JSON object into a `SortField` object. | ||
| /// | ||
| /// This function parses the provided JSON and creates a `SortField` object. | ||
| /// It expects the JSON object to contain keys for the transform, source ID, direction, | ||
| /// and null order. | ||
| /// | ||
| /// \param json The JSON object representing a `SortField`. | ||
| /// \return An `expected` value containing either a `SortField` object or an error. If the | ||
| /// JSON is malformed or missing expected fields, an error will be returned. | ||
| expected<std::unique_ptr<SortField>, Error> SortFieldFromJson(const nlohmann::json& json); | ||
|
|
||
| /// \brief Deserializes a JSON object into a `SortOrder` object. | ||
| /// | ||
| /// This function parses the provided JSON and creates a `SortOrder` object. | ||
| /// It expects the JSON object to contain the order ID and a list of `SortField` objects. | ||
| /// Each `SortField` will be parsed using the `SortFieldFromJson` function. | ||
| /// | ||
| /// \param json The JSON object representing a `SortOrder`. | ||
| /// \return An `expected` value containing either a `SortOrder` object or an error. If the | ||
| /// JSON is malformed or missing expected fields, an error will be returned. | ||
| expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson(const nlohmann::json& json); | ||
|
|
||
| } // namespace iceberg |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,8 @@ | |
| #include <string_view> | ||
| #include <vector> | ||
|
|
||
| #include "iceberg/error.h" | ||
| #include "iceberg/expected.h" | ||
| #include "iceberg/iceberg_export.h" | ||
| #include "iceberg/type_fwd.h" | ||
| #include "iceberg/util/formattable.h" | ||
|
|
@@ -41,13 +43,52 @@ enum class SortDirection { | |
| /// Descending | ||
| kDescending, | ||
| }; | ||
| /// \brief Get the relative sort direction name | ||
| ICEBERG_EXPORT constexpr std::string_view SortDirectionToString(SortDirection direction) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: we should probably only declare these functions here and put all implementation in the .cc file. |
||
| switch (direction) { | ||
| case SortDirection::kAscending: | ||
| return "asc"; | ||
| case SortDirection::kDescending: | ||
| return "desc"; | ||
| default: | ||
| return "invalid"; | ||
| } | ||
| } | ||
| /// \brief Get the relative sort direction from name | ||
| ICEBERG_EXPORT constexpr expected<SortDirection, Error> SortDirectionFromString( | ||
| std::string_view str) { | ||
| if (str == "asc") return SortDirection::kAscending; | ||
| if (str == "desc") return SortDirection::kDescending; | ||
| return unexpected<Error>( | ||
| {.kind = ErrorKind::kInvalidArgument, | ||
| .message = "Invalid SortDirection string: " + std::string(str)}); | ||
| } | ||
|
|
||
| enum class NullOrder { | ||
| /// Nulls are sorted first | ||
| kFirst, | ||
| /// Nulls are sorted last | ||
| kLast, | ||
| }; | ||
| /// \brief Get the relative null order name | ||
| ICEBERG_EXPORT constexpr std::string_view NullOrderToString(NullOrder null_order) { | ||
| switch (null_order) { | ||
| case NullOrder::kFirst: | ||
| return "nulls-first"; | ||
| case NullOrder::kLast: | ||
| return "nulls-last"; | ||
| default: | ||
| return "invalid"; | ||
| } | ||
| } | ||
| /// \brief Get the relative null order from name | ||
| ICEBERG_EXPORT constexpr expected<NullOrder, Error> NullOrderFromString( | ||
| std::string_view str) { | ||
| if (str == "nulls-first") return NullOrder::kFirst; | ||
| if (str == "nulls-last") return NullOrder::kLast; | ||
| return unexpected<Error>({.kind = ErrorKind::kInvalidArgument, | ||
| .message = "Invalid NullOrder string: " + std::string(str)}); | ||
| } | ||
|
|
||
| /// \brief a field with its transform. | ||
| class ICEBERG_EXPORT SortField : public util::Formattable { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,4 +72,14 @@ expected<ArrowArray, Error> IdentityTransformFunction::Transform( | |
| .message = "IdentityTransformFunction::Transform"}); | ||
| } | ||
|
|
||
| expected<std::unique_ptr<TransformFunction>, Error> TransformFunctionFromString( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not related, but should we do something like define |
||
| std::string_view str) { | ||
| if (str == "identity") { | ||
| return std::make_unique<IdentityTransformFunction>(); | ||
| } | ||
| return unexpected<Error>( | ||
| {.kind = ErrorKind::kInvalidArgument, | ||
| .message = "Invalid TransformFunction string: " + std::string(str)}); | ||
| } | ||
|
|
||
| } // namespace iceberg | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We'll probably want to make a broader version of this macro. (Not necessarily in this PR)
FWIW, you could steal the Arrow version, which uses a counter to autogenerate the name, so it can be used like
ARROW_ASSIGN_OR_RAISE(auto value, ...)orARROW_ASSIGN_OR_RAISE(foo.bar, ...)tooThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did this exactly: https://github.com/apache/iceberg-cpp/pull/65/files#diff-d8a08fcbce5230d86b8b253508bd5d8dc6216a2d0f5049b58b6e7792f2243040