From 3af3afb21ff1cce53c960740114cb0a60a6eaba9 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Thu, 10 Apr 2025 17:50:16 +0800 Subject: [PATCH] refactor: consolidate json utility --- src/iceberg/json_internal.cc | 309 ++++++++++++++++++++++++++++++--- src/iceberg/json_internal.h | 38 ++++ src/iceberg/schema_internal.cc | 286 +----------------------------- src/iceberg/schema_internal.h | 40 +---- test/schema_json_test.cc | 3 +- 5 files changed, 332 insertions(+), 344 deletions(-) diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc index b953d1d46..6847b3ac9 100644 --- a/src/iceberg/json_internal.cc +++ b/src/iceberg/json_internal.cc @@ -20,12 +20,19 @@ #include "iceberg/json_internal.h" #include +#include #include +#include "iceberg/error.h" +#include "iceberg/expected.h" +#include "iceberg/schema.h" +#include "iceberg/schema_internal.h" #include "iceberg/sort_order.h" #include "iceberg/transform.h" +#include "iceberg/type.h" #include "iceberg/util/formatter.h" +#include "iceberg/util/macros.h" namespace iceberg { @@ -39,26 +46,46 @@ constexpr std::string_view kNullOrder = "null-order"; constexpr std::string_view kOrderId = "order-id"; constexpr std::string_view kFields = "fields"; -// --- helper for safe JSON extraction --- +constexpr std::string_view kSchemaId = "schema-id"; +constexpr std::string_view kIdentifierFieldIds = "identifier-field-ids"; + +constexpr std::string_view kType = "type"; +constexpr std::string_view kStruct = "struct"; +constexpr std::string_view kList = "list"; +constexpr std::string_view kMap = "map"; +constexpr std::string_view kElement = "element"; +constexpr std::string_view kKey = "key"; +constexpr std::string_view kValue = "value"; +constexpr std::string_view kDoc = "doc"; +constexpr std::string_view kName = "name"; +constexpr std::string_view kId = "id"; +constexpr std::string_view kInitialDefault = "initial-default"; +constexpr std::string_view kWriteDefault = "write-default"; +constexpr std::string_view kElementId = "element-id"; +constexpr std::string_view kKeyId = "key-id"; +constexpr std::string_view kValueId = "value-id"; +constexpr std::string_view kRequired = "required"; +constexpr std::string_view kElementRequired = "element-required"; +constexpr std::string_view kValueRequired = "value-required"; + template expected GetJsonValue(const nlohmann::json& json, std::string_view key) { if (!json.contains(key)) { - return unexpected({.kind = ErrorKind::kInvalidArgument, - .message = "Missing key: " + std::string(key)}); + return unexpected({ + .kind = ErrorKind::kJsonParseError, + .message = std::format("Missing '{}' in {}", key, json.dump()), + }); } try { return json.at(key).get(); } catch (const std::exception& ex) { - return unexpected({.kind = ErrorKind::kInvalidArgument, - .message = std::string("Failed to parse key: ") + - key.data() + ", " + ex.what()}); + return unexpected({ + .kind = ErrorKind::kJsonParseError, + .message = std::format("Failed to parse key '{}' in {}", key, json.dump()), + }); } } -#define TRY_ASSIGN(json_value, expr) \ - auto _tmp_##json_value = (expr); \ - if (!_tmp_##json_value) return unexpected(_tmp_##json_value.error()); \ - auto json_value = std::move(_tmp_##json_value.value()); } // namespace nlohmann::json ToJson(const SortField& sort_field) { @@ -84,29 +111,265 @@ nlohmann::json ToJson(const SortOrder& sort_order) { expected, Error> SortFieldFromJson( const nlohmann::json& json) { - TRY_ASSIGN(transform_str, GetJsonValue(json, kTransform)); - TRY_ASSIGN(transform, TransformFunctionFromString(transform_str)); - TRY_ASSIGN(source_id, GetJsonValue(json, kSourceId)); - TRY_ASSIGN(direction_str, GetJsonValue(json, kDirection)); - TRY_ASSIGN(direction, SortDirectionFromString(direction_str)); - TRY_ASSIGN(null_order_str, GetJsonValue(json, kNullOrder)); - TRY_ASSIGN(null_order, NullOrderFromString(null_order_str)); - + ICEBERG_ASSIGN_OR_RAISE(auto source_id, GetJsonValue(json, kSourceId)); + ICEBERG_ASSIGN_OR_RAISE( + auto transform, + GetJsonValue(json, kTransform).and_then(TransformFunctionFromString)); + ICEBERG_ASSIGN_OR_RAISE( + auto direction, + GetJsonValue(json, kDirection).and_then(SortDirectionFromString)); + ICEBERG_ASSIGN_OR_RAISE( + auto null_order, + GetJsonValue(json, kNullOrder).and_then(NullOrderFromString)); return std::make_unique(source_id, std::move(transform), direction, null_order); } expected, Error> SortOrderFromJson( const nlohmann::json& json) { - TRY_ASSIGN(order_id, GetJsonValue(json, kOrderId)); + ICEBERG_ASSIGN_OR_RAISE(auto order_id, GetJsonValue(json, kOrderId)); + ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue(json, kFields)); std::vector sort_fields; - for (const auto& field_json : json.at(kFields)) { - TRY_ASSIGN(sort_field, SortFieldFromJson(field_json)); - sort_fields.push_back(*sort_field); + for (const auto& field_json : fields) { + ICEBERG_ASSIGN_OR_RAISE(auto sort_field, SortFieldFromJson(field_json)); + sort_fields.push_back(std::move(*sort_field)); } - return std::make_unique(order_id, std::move(sort_fields)); } +nlohmann::json FieldToJson(const SchemaField& field) { + nlohmann::json json; + json[kId] = field.field_id(); + json[kName] = field.name(); + json[kRequired] = !field.optional(); + json[kType] = TypeToJson(*field.type()); + return json; +} + +nlohmann::json TypeToJson(const Type& type) { + switch (type.type_id()) { + case TypeId::kStruct: { + const auto& struct_type = static_cast(type); + nlohmann::json json; + json[kType] = kStruct; + nlohmann::json fields_json = nlohmann::json::array(); + for (const auto& field : struct_type.fields()) { + fields_json.push_back(FieldToJson(field)); + // TODO(gangwu): add default values + } + json[kFields] = fields_json; + return json; + } + case TypeId::kList: { + const auto& list_type = static_cast(type); + nlohmann::json json; + json[kType] = kList; + + const auto& element_field = list_type.fields().front(); + json[kElementId] = element_field.field_id(); + json[kElementRequired] = !element_field.optional(); + json[kElement] = TypeToJson(*element_field.type()); + return json; + } + case TypeId::kMap: { + const auto& map_type = static_cast(type); + nlohmann::json json; + json[std::string(kType)] = kMap; + + const auto& key_field = map_type.key(); + json[kKeyId] = key_field.field_id(); + json[kKey] = TypeToJson(*key_field.type()); + + const auto& value_field = map_type.value(); + json[kValueId] = value_field.field_id(); + json[kValueRequired] = !value_field.optional(); + json[kValue] = TypeToJson(*value_field.type()); + return json; + } + case TypeId::kBoolean: + return "boolean"; + case TypeId::kInt: + return "int"; + case TypeId::kLong: + return "long"; + case TypeId::kFloat: + return "float"; + case TypeId::kDouble: + return "double"; + case TypeId::kDecimal: { + const auto& decimal_type = static_cast(type); + return std::format("decimal({},{})", decimal_type.precision(), + decimal_type.scale()); + } + case TypeId::kDate: + return "date"; + case TypeId::kTime: + return "time"; + case TypeId::kTimestamp: + return "timestamp"; + case TypeId::kTimestampTz: + return "timestamptz"; + case TypeId::kString: + return "string"; + case TypeId::kBinary: + return "binary"; + case TypeId::kFixed: { + const auto& fixed_type = static_cast(type); + return std::format("fixed[{}]", fixed_type.length()); + } + case TypeId::kUuid: + return "uuid"; + } +} + +nlohmann::json SchemaToJson(const Schema& schema) { + nlohmann::json json = TypeToJson(static_cast(schema)); + json[kSchemaId] = schema.schema_id(); + // TODO(gangwu): add identifier-field-ids. + return json; +} + +namespace { + +expected, Error> StructTypeFromJson(const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto json_fields, GetJsonValue(json, kFields)); + + std::vector fields; + for (const auto& field_json : json_fields) { + ICEBERG_ASSIGN_OR_RAISE(auto field, FieldFromJson(field_json)); + fields.emplace_back(std::move(*field)); + } + + return std::make_unique(std::move(fields)); +} + +expected, Error> ListTypeFromJson(const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto element_type, TypeFromJson(json[kElement])); + ICEBERG_ASSIGN_OR_RAISE(auto element_id, GetJsonValue(json, kElementId)); + ICEBERG_ASSIGN_OR_RAISE(auto element_required, + GetJsonValue(json, kElementRequired)); + + return std::make_unique( + SchemaField(element_id, std::string(ListType::kElementName), + std::move(element_type), !element_required)); +} + +expected, Error> MapTypeFromJson(const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE( + auto key_type, GetJsonValue(json, kKey).and_then(TypeFromJson)); + ICEBERG_ASSIGN_OR_RAISE( + auto value_type, GetJsonValue(json, kValue).and_then(TypeFromJson)); + + ICEBERG_ASSIGN_OR_RAISE(auto key_id, GetJsonValue(json, kKeyId)); + ICEBERG_ASSIGN_OR_RAISE(auto value_id, GetJsonValue(json, kValueId)); + ICEBERG_ASSIGN_OR_RAISE(auto value_required, GetJsonValue(json, kValueRequired)); + + SchemaField key_field(key_id, std::string(MapType::kKeyName), std::move(key_type), + /*optional=*/false); + SchemaField value_field(value_id, std::string(MapType::kValueName), + std::move(value_type), !value_required); + return std::make_unique(std::move(key_field), std::move(value_field)); +} + +} // namespace + +expected, Error> TypeFromJson(const nlohmann::json& json) { + if (json.is_string()) { + std::string type_str = json.get(); + if (type_str == "boolean") { + return std::make_unique(); + } else if (type_str == "int") { + return std::make_unique(); + } else if (type_str == "long") { + return std::make_unique(); + } else if (type_str == "float") { + return std::make_unique(); + } else if (type_str == "double") { + return std::make_unique(); + } else if (type_str == "date") { + return std::make_unique(); + } else if (type_str == "time") { + return std::make_unique(); + } else if (type_str == "timestamp") { + return std::make_unique(); + } else if (type_str == "timestamptz") { + return std::make_unique(); + } else if (type_str == "string") { + return std::make_unique(); + } else if (type_str == "binary") { + return std::make_unique(); + } else if (type_str == "uuid") { + return std::make_unique(); + } else if (type_str.starts_with("fixed")) { + std::regex fixed_regex(R"(fixed\[\s*(\d+)\s*\])"); + std::smatch match; + if (std::regex_match(type_str, match, fixed_regex)) { + return std::make_unique(std::stoi(match[1].str())); + } + return unexpected({ + .kind = ErrorKind::kJsonParseError, + .message = std::format("Invalid fixed type: {}", type_str), + }); + } else if (type_str.starts_with("decimal")) { + std::regex decimal_regex(R"(decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))"); + std::smatch match; + if (std::regex_match(type_str, match, decimal_regex)) { + return std::make_unique(std::stoi(match[1].str()), + std::stoi(match[2].str())); + } + return unexpected({ + .kind = ErrorKind::kJsonParseError, + .message = std::format("Invalid decimal type: {}", type_str), + }); + } else { + return unexpected({ + .kind = ErrorKind::kJsonParseError, + .message = std::format("Unknown primitive type: {}", type_str), + }); + } + } + + // For complex types like struct, list, and map + ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue(json, kType)); + if (type_str == kStruct) { + return StructTypeFromJson(json); + } else if (type_str == kList) { + return ListTypeFromJson(json); + } else if (type_str == kMap) { + return MapTypeFromJson(json); + } else { + return unexpected({ + .kind = ErrorKind::kJsonParseError, + .message = std::format("Unknown complex type: {}", type_str), + }); + } +} + +expected, Error> FieldFromJson(const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE( + auto type, GetJsonValue(json, kType).and_then(TypeFromJson)); + ICEBERG_ASSIGN_OR_RAISE(auto field_id, GetJsonValue(json, kId)); + ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue(json, kName)); + ICEBERG_ASSIGN_OR_RAISE(auto required, GetJsonValue(json, kRequired)); + + return std::make_unique(field_id, std::move(name), std::move(type), + !required); +} + +expected, Error> SchemaFromJson(const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto schema_id, GetJsonValue(json, kSchemaId)); + ICEBERG_ASSIGN_OR_RAISE(auto type, TypeFromJson(json)); + + if (type->type_id() != TypeId::kStruct) [[unlikely]] { + return unexpected({ + .kind = ErrorKind::kJsonParseError, + .message = std::format("Schema must be a struct type, but got {}", json.dump()), + }); + } + + auto& struct_type = static_cast(*type); + return FromStructType(std::move(struct_type), schema_id); +} + } // namespace iceberg diff --git a/src/iceberg/json_internal.h b/src/iceberg/json_internal.h index a05bf3eac..c23eec743 100644 --- a/src/iceberg/json_internal.h +++ b/src/iceberg/json_internal.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include @@ -28,6 +29,7 @@ #include "iceberg/type_fwd.h" namespace iceberg { + /// \brief Serializes a `SortField` object to JSON. /// /// This function converts a `SortField` object into a JSON representation. @@ -70,4 +72,40 @@ expected, Error> SortFieldFromJson(const nlohmann::js /// JSON is malformed or missing expected fields, an error will be returned. expected, Error> SortOrderFromJson(const nlohmann::json& json); +/// \brief Convert an Iceberg Schema to JSON. +/// +/// \param[in] schema The Iceberg schema to convert. +/// \return The JSON representation of the schema. +nlohmann::json SchemaToJson(const Schema& schema); + +/// \brief Convert an Iceberg Type to JSON. +/// +/// \param[in] type The Iceberg type to convert. +/// \return The JSON representation of the type. +nlohmann::json TypeToJson(const Type& type); + +/// \brief Convert an Iceberg SchemaField to JSON. +/// +/// \param[in] field The Iceberg field to convert. +/// \return The JSON representation of the field. +nlohmann::json FieldToJson(const SchemaField& field); + +/// \brief Convert JSON to an Iceberg Schema. +/// +/// \param[in] json The JSON representation of the schema. +/// \return The Iceberg schema or an error if the conversion fails. +expected, Error> SchemaFromJson(const nlohmann::json& json); + +/// \brief Convert JSON to an Iceberg Type. +/// +/// \param[in] json The JSON representation of the type. +/// \return The Iceberg type or an error if the conversion fails. +expected, Error> TypeFromJson(const nlohmann::json& json); + +/// \brief Convert JSON to an Iceberg SchemaField. +/// +/// \param[in] json The JSON representation of the field. +/// \return The Iceberg field or an error if the conversion fails. +expected, Error> FieldFromJson(const nlohmann::json& json); + } // namespace iceberg diff --git a/src/iceberg/schema_internal.cc b/src/iceberg/schema_internal.cc index 741c4d7d7..5a9620e57 100644 --- a/src/iceberg/schema_internal.cc +++ b/src/iceberg/schema_internal.cc @@ -22,15 +22,11 @@ #include #include #include -#include #include -#include - #include "iceberg/expected.h" #include "iceberg/schema.h" #include "iceberg/type.h" -#include "iceberg/util/macros.h" namespace iceberg { @@ -42,29 +38,6 @@ constexpr const char* kArrowExtensionMetadata = "ARROW:extension:metadata"; constexpr const char* kArrowUuidExtensionName = "arrow.uuid"; constexpr int32_t kUnknownFieldId = -1; -// Constants for schema json serialization -constexpr std::string_view kSchemaId = "schema-id"; -constexpr std::string_view kIdentifierFieldIds = "identifier-field-ids"; -constexpr std::string_view kType = "type"; -constexpr std::string_view kStruct = "struct"; -constexpr std::string_view kList = "list"; -constexpr std::string_view kMap = "map"; -constexpr std::string_view kFields = "fields"; -constexpr std::string_view kElement = "element"; -constexpr std::string_view kKey = "key"; -constexpr std::string_view kValue = "value"; -constexpr std::string_view kDoc = "doc"; -constexpr std::string_view kName = "name"; -constexpr std::string_view kId = "id"; -constexpr std::string_view kInitialDefault = "initial-default"; -constexpr std::string_view kWriteDefault = "write-default"; -constexpr std::string_view kElementId = "element-id"; -constexpr std::string_view kKeyId = "key-id"; -constexpr std::string_view kValueId = "value-id"; -constexpr std::string_view kRequired = "required"; -constexpr std::string_view kElementRequired = "element-required"; -constexpr std::string_view kValueRequired = "value-required"; - // Convert an Iceberg type to Arrow schema. Return value is Nanoarrow error code. ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view name, std::optional field_id, ArrowSchema* schema) { @@ -357,6 +330,8 @@ expected, Error> FromArrowSchema(const ArrowSchema& schema } } +} // namespace + std::unique_ptr FromStructType(StructType&& struct_type, int32_t schema_id) { std::vector fields; fields.reserve(struct_type.fields().size()); @@ -366,8 +341,6 @@ std::unique_ptr FromStructType(StructType&& struct_type, int32_t schema_ return std::make_unique(schema_id, std::move(fields)); } -} // namespace - expected, Error> FromArrowSchema(const ArrowSchema& schema, int32_t schema_id) { auto type_result = FromArrowSchema(schema); @@ -386,259 +359,4 @@ expected, Error> FromArrowSchema(const ArrowSchema& sche return FromStructType(std::move(struct_type), schema_id); } -nlohmann::json FieldToJson(const SchemaField& field) { - nlohmann::json json; - json[kId] = field.field_id(); - json[kName] = field.name(); - json[kRequired] = !field.optional(); - json[kType] = TypeToJson(*field.type()); - return json; -} - -nlohmann::json TypeToJson(const Type& type) { - switch (type.type_id()) { - case TypeId::kStruct: { - const auto& struct_type = static_cast(type); - nlohmann::json json; - json[kType] = kStruct; - nlohmann::json fields_json = nlohmann::json::array(); - for (const auto& field : struct_type.fields()) { - fields_json.push_back(FieldToJson(field)); - // TODO(gangwu): add default values - } - json[kFields] = fields_json; - return json; - } - case TypeId::kList: { - const auto& list_type = static_cast(type); - nlohmann::json json; - json[kType] = kList; - - const auto& element_field = list_type.fields().front(); - json[kElementId] = element_field.field_id(); - json[kElementRequired] = !element_field.optional(); - json[kElement] = TypeToJson(*element_field.type()); - return json; - } - case TypeId::kMap: { - const auto& map_type = static_cast(type); - nlohmann::json json; - json[std::string(kType)] = kMap; - - const auto& key_field = map_type.key(); - json[kKeyId] = key_field.field_id(); - json[kKey] = TypeToJson(*key_field.type()); - - const auto& value_field = map_type.value(); - json[kValueId] = value_field.field_id(); - json[kValueRequired] = !value_field.optional(); - json[kValue] = TypeToJson(*value_field.type()); - return json; - } - case TypeId::kBoolean: - return "boolean"; - case TypeId::kInt: - return "int"; - case TypeId::kLong: - return "long"; - case TypeId::kFloat: - return "float"; - case TypeId::kDouble: - return "double"; - case TypeId::kDecimal: { - const auto& decimal_type = static_cast(type); - return std::format("decimal({},{})", decimal_type.precision(), - decimal_type.scale()); - } - case TypeId::kDate: - return "date"; - case TypeId::kTime: - return "time"; - case TypeId::kTimestamp: - return "timestamp"; - case TypeId::kTimestampTz: - return "timestamptz"; - case TypeId::kString: - return "string"; - case TypeId::kBinary: - return "binary"; - case TypeId::kFixed: { - const auto& fixed_type = static_cast(type); - return std::format("fixed[{}]", fixed_type.length()); - } - case TypeId::kUuid: - return "uuid"; - } -} - -nlohmann::json SchemaToJson(const Schema& schema) { - nlohmann::json json = TypeToJson(static_cast(schema)); - json[kSchemaId] = schema.schema_id(); - // TODO(gangwu): add identifier-field-ids. - return json; -} - -namespace { - -#define ICEBERG_CHECK_JSON_FIELD(field_name, json) \ - if (!json.contains(field_name)) [[unlikely]] { \ - return unexpected({ \ - .kind = ErrorKind::kJsonParseError, \ - .message = std::format("Missing '{}' in {}", field_name, json.dump()), \ - }); \ - } - -expected, Error> StructTypeFromJson(const nlohmann::json& json) { - ICEBERG_CHECK_JSON_FIELD(kFields, json); - - std::vector fields; - for (const auto& field_json : json[kFields]) { - ICEBERG_ASSIGN_OR_RAISE(auto field, FieldFromJson(field_json)); - fields.emplace_back(std::move(*field)); - } - - return std::make_unique(std::move(fields)); -} - -expected, Error> ListTypeFromJson(const nlohmann::json& json) { - ICEBERG_CHECK_JSON_FIELD(kElement, json); - ICEBERG_CHECK_JSON_FIELD(kElementId, json); - ICEBERG_CHECK_JSON_FIELD(kElementRequired, json); - - ICEBERG_ASSIGN_OR_RAISE(auto element_type, TypeFromJson(json[kElement])); - int32_t element_id = json[kElementId].get(); - bool element_required = json[kElementRequired].get(); - - return std::make_unique( - SchemaField(element_id, std::string(ListType::kElementName), - std::move(element_type), !element_required)); -} - -expected, Error> MapTypeFromJson(const nlohmann::json& json) { - ICEBERG_CHECK_JSON_FIELD(kKey, json); - ICEBERG_CHECK_JSON_FIELD(kValue, json); - ICEBERG_CHECK_JSON_FIELD(kKeyId, json); - ICEBERG_CHECK_JSON_FIELD(kValueId, json); - ICEBERG_CHECK_JSON_FIELD(kValueRequired, json); - - ICEBERG_ASSIGN_OR_RAISE(auto key_type, TypeFromJson(json[kKey])); - ICEBERG_ASSIGN_OR_RAISE(auto value_type, TypeFromJson(json[kValue])); - int32_t key_id = json[kKeyId].get(); - int32_t value_id = json[kValueId].get(); - bool value_required = json[kValueRequired].get(); - - SchemaField key_field(key_id, std::string(MapType::kKeyName), std::move(key_type), - /*optional=*/false); - SchemaField value_field(value_id, std::string(MapType::kValueName), - std::move(value_type), !value_required); - return std::make_unique(std::move(key_field), std::move(value_field)); -} - -} // namespace - -expected, Error> TypeFromJson(const nlohmann::json& json) { - if (json.is_string()) { - std::string type_str = json.get(); - if (type_str == "boolean") { - return std::make_unique(); - } else if (type_str == "int") { - return std::make_unique(); - } else if (type_str == "long") { - return std::make_unique(); - } else if (type_str == "float") { - return std::make_unique(); - } else if (type_str == "double") { - return std::make_unique(); - } else if (type_str == "date") { - return std::make_unique(); - } else if (type_str == "time") { - return std::make_unique(); - } else if (type_str == "timestamp") { - return std::make_unique(); - } else if (type_str == "timestamptz") { - return std::make_unique(); - } else if (type_str == "string") { - return std::make_unique(); - } else if (type_str == "binary") { - return std::make_unique(); - } else if (type_str == "uuid") { - return std::make_unique(); - } else if (type_str.starts_with("fixed")) { - std::regex fixed_regex(R"(fixed\[\s*(\d+)\s*\])"); - std::smatch match; - if (std::regex_match(type_str, match, fixed_regex)) { - return std::make_unique(std::stoi(match[1].str())); - } - return unexpected({ - .kind = ErrorKind::kJsonParseError, - .message = std::format("Invalid fixed type: {}", type_str), - }); - } else if (type_str.starts_with("decimal")) { - std::regex decimal_regex(R"(decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))"); - std::smatch match; - if (std::regex_match(type_str, match, decimal_regex)) { - return std::make_unique(std::stoi(match[1].str()), - std::stoi(match[2].str())); - } - return unexpected({ - .kind = ErrorKind::kJsonParseError, - .message = std::format("Invalid decimal type: {}", type_str), - }); - } else { - return unexpected({ - .kind = ErrorKind::kJsonParseError, - .message = std::format("Unknown primitive type: {}", type_str), - }); - } - } - - // For complex types like struct, list, and map - ICEBERG_CHECK_JSON_FIELD(kType, json); - std::string type_str = json[kType].get(); - if (type_str == kStruct) { - return StructTypeFromJson(json); - } else if (type_str == kList) { - return ListTypeFromJson(json); - } else if (type_str == kMap) { - return MapTypeFromJson(json); - } else { - return unexpected({ - .kind = ErrorKind::kJsonParseError, - .message = std::format("Unknown complex type: {}", type_str), - }); - } -} - -expected, Error> FieldFromJson(const nlohmann::json& json) { - ICEBERG_CHECK_JSON_FIELD(kId, json); - ICEBERG_CHECK_JSON_FIELD(kName, json); - ICEBERG_CHECK_JSON_FIELD(kType, json); - ICEBERG_CHECK_JSON_FIELD(kRequired, json); - - ICEBERG_ASSIGN_OR_RAISE(auto type, TypeFromJson(json[kType])); - int32_t field_id = json[kId].get(); - std::string name = json[kName].get(); - bool required = json[kRequired].get(); - - return std::make_unique(field_id, std::move(name), std::move(type), - !required); -} - -expected, Error> SchemaFromJson(const nlohmann::json& json) { - ICEBERG_CHECK_JSON_FIELD(kType, json); - ICEBERG_CHECK_JSON_FIELD(kSchemaId, json); - - ICEBERG_ASSIGN_OR_RAISE(auto type, TypeFromJson(json)); - if (type->type_id() != TypeId::kStruct) [[unlikely]] { - return unexpected({ - .kind = ErrorKind::kJsonParseError, - .message = std::format("Schema must be a struct type, but got {}", json.dump()), - }); - } - - int32_t schema_id = json[kSchemaId].get(); - auto& struct_type = static_cast(*type); - return FromStructType(std::move(struct_type), schema_id); -} - } // namespace iceberg diff --git a/src/iceberg/schema_internal.h b/src/iceberg/schema_internal.h index 6cb2daa02..ab7f1ab1f 100644 --- a/src/iceberg/schema_internal.h +++ b/src/iceberg/schema_internal.h @@ -22,7 +22,6 @@ #include #include -#include #include "iceberg/error.h" #include "iceberg/expected.h" @@ -50,40 +49,11 @@ expected ToArrowSchema(const Schema& schema, ArrowSchema* out); expected, Error> FromArrowSchema(const ArrowSchema& schema, int32_t schema_id); -/// \brief Convert an Iceberg Schema to JSON. +/// \brief Convert a struct type to an Iceberg schema. /// -/// \param[in] schema The Iceberg schema to convert. -/// \return The JSON representation of the schema. -nlohmann::json SchemaToJson(const Schema& schema); - -/// \brief Convert an Iceberg Type to JSON. -/// -/// \param[in] type The Iceberg type to convert. -/// \return The JSON representation of the type. -nlohmann::json TypeToJson(const Type& type); - -/// \brief Convert an Iceberg SchemaField to JSON. -/// -/// \param[in] field The Iceberg field to convert. -/// \return The JSON representation of the field. -nlohmann::json FieldToJson(const SchemaField& field); - -/// \brief Convert JSON to an Iceberg Schema. -/// -/// \param[in] json The JSON representation of the schema. -/// \return The Iceberg schema or an error if the conversion fails. -expected, Error> SchemaFromJson(const nlohmann::json& json); - -/// \brief Convert JSON to an Iceberg Type. -/// -/// \param[in] json The JSON representation of the type. -/// \return The Iceberg type or an error if the conversion fails. -expected, Error> TypeFromJson(const nlohmann::json& json); - -/// \brief Convert JSON to an Iceberg SchemaField. -/// -/// \param[in] json The JSON representation of the field. -/// \return The Iceberg field or an error if the conversion fails. -expected, Error> FieldFromJson(const nlohmann::json& json); +/// \param[in] struct_type The struct type to convert. +/// \param[in] schema_id The schema ID of the Iceberg schema. +/// \return The Iceberg schema. +std::unique_ptr FromStructType(StructType&& struct_type, int32_t schema_id); } // namespace iceberg diff --git a/test/schema_json_test.cc b/test/schema_json_test.cc index d538ebead..32c50da74 100644 --- a/test/schema_json_test.cc +++ b/test/schema_json_test.cc @@ -24,10 +24,9 @@ #include #include -#include "gtest/gtest.h" +#include "iceberg/json_internal.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" -#include "iceberg/schema_internal.h" #include "iceberg/type.h" namespace iceberg {