Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
309 changes: 286 additions & 23 deletions src/iceberg/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,19 @@
#include "iceberg/json_internal.h"

#include <format>
#include <regex>

#include <nlohmann/json.hpp>

#include "iceberg/error.h"
#include "iceberg/expected.h"
#include "iceberg/schema.h"
#include "iceberg/schema_internal.h"
#include "iceberg/sort_order.h"
#include "iceberg/transform.h"
#include "iceberg/type.h"
#include "iceberg/util/formatter.h"
#include "iceberg/util/macros.h"

namespace iceberg {

Expand All @@ -39,26 +46,46 @@ constexpr std::string_view kNullOrder = "null-order";
constexpr std::string_view kOrderId = "order-id";
constexpr std::string_view kFields = "fields";

// --- helper for safe JSON extraction ---
constexpr std::string_view kSchemaId = "schema-id";
constexpr std::string_view kIdentifierFieldIds = "identifier-field-ids";

constexpr std::string_view kType = "type";
constexpr std::string_view kStruct = "struct";
constexpr std::string_view kList = "list";
constexpr std::string_view kMap = "map";
constexpr std::string_view kElement = "element";
constexpr std::string_view kKey = "key";
constexpr std::string_view kValue = "value";
constexpr std::string_view kDoc = "doc";
constexpr std::string_view kName = "name";
constexpr std::string_view kId = "id";
constexpr std::string_view kInitialDefault = "initial-default";
constexpr std::string_view kWriteDefault = "write-default";
constexpr std::string_view kElementId = "element-id";
constexpr std::string_view kKeyId = "key-id";
constexpr std::string_view kValueId = "value-id";
constexpr std::string_view kRequired = "required";
constexpr std::string_view kElementRequired = "element-required";
constexpr std::string_view kValueRequired = "value-required";

template <typename T>
expected<T, Error> GetJsonValue(const nlohmann::json& json, std::string_view key) {
if (!json.contains(key)) {
return unexpected<Error>({.kind = ErrorKind::kInvalidArgument,
.message = "Missing key: " + std::string(key)});
return unexpected<Error>({
.kind = ErrorKind::kJsonParseError,
.message = std::format("Missing '{}' in {}", key, json.dump()),
});
}
try {
return json.at(key).get<T>();
} catch (const std::exception& ex) {
return unexpected<Error>({.kind = ErrorKind::kInvalidArgument,
.message = std::string("Failed to parse key: ") +
key.data() + ", " + ex.what()});
return unexpected<Error>({
.kind = ErrorKind::kJsonParseError,
.message = std::format("Failed to parse key '{}' in {}", key, json.dump()),
});
}
}

#define TRY_ASSIGN(json_value, expr) \
auto _tmp_##json_value = (expr); \
if (!_tmp_##json_value) return unexpected(_tmp_##json_value.error()); \
auto json_value = std::move(_tmp_##json_value.value());
} // namespace

nlohmann::json ToJson(const SortField& sort_field) {
Expand All @@ -84,29 +111,265 @@ nlohmann::json ToJson(const SortOrder& sort_order) {

expected<std::unique_ptr<SortField>, Error> SortFieldFromJson(
const nlohmann::json& json) {
TRY_ASSIGN(transform_str, GetJsonValue<std::string>(json, kTransform));
TRY_ASSIGN(transform, TransformFunctionFromString(transform_str));
TRY_ASSIGN(source_id, GetJsonValue<int32_t>(json, kSourceId));
TRY_ASSIGN(direction_str, GetJsonValue<std::string>(json, kDirection));
TRY_ASSIGN(direction, SortDirectionFromString(direction_str));
TRY_ASSIGN(null_order_str, GetJsonValue<std::string>(json, kNullOrder));
TRY_ASSIGN(null_order, NullOrderFromString(null_order_str));

ICEBERG_ASSIGN_OR_RAISE(auto source_id, GetJsonValue<int32_t>(json, kSourceId));
ICEBERG_ASSIGN_OR_RAISE(
auto transform,
GetJsonValue<std::string>(json, kTransform).and_then(TransformFunctionFromString));
ICEBERG_ASSIGN_OR_RAISE(
auto direction,
GetJsonValue<std::string>(json, kDirection).and_then(SortDirectionFromString));
ICEBERG_ASSIGN_OR_RAISE(
auto null_order,
GetJsonValue<std::string>(json, kNullOrder).and_then(NullOrderFromString));
return std::make_unique<SortField>(source_id, std::move(transform), direction,
null_order);
}

expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson(
const nlohmann::json& json) {
TRY_ASSIGN(order_id, GetJsonValue<int32_t>(json, kOrderId));
ICEBERG_ASSIGN_OR_RAISE(auto order_id, GetJsonValue<int32_t>(json, kOrderId));
ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue<nlohmann::json>(json, kFields));

std::vector<SortField> sort_fields;
for (const auto& field_json : json.at(kFields)) {
TRY_ASSIGN(sort_field, SortFieldFromJson(field_json));
sort_fields.push_back(*sort_field);
for (const auto& field_json : fields) {
ICEBERG_ASSIGN_OR_RAISE(auto sort_field, SortFieldFromJson(field_json));
sort_fields.push_back(std::move(*sort_field));
}

return std::make_unique<SortOrder>(order_id, std::move(sort_fields));
}

nlohmann::json FieldToJson(const SchemaField& field) {
nlohmann::json json;
json[kId] = field.field_id();
json[kName] = field.name();
json[kRequired] = !field.optional();
json[kType] = TypeToJson(*field.type());
return json;
}

nlohmann::json TypeToJson(const Type& type) {
switch (type.type_id()) {
case TypeId::kStruct: {
const auto& struct_type = static_cast<const StructType&>(type);
nlohmann::json json;
json[kType] = kStruct;
nlohmann::json fields_json = nlohmann::json::array();
for (const auto& field : struct_type.fields()) {
fields_json.push_back(FieldToJson(field));
// TODO(gangwu): add default values
}
json[kFields] = fields_json;
return json;
}
case TypeId::kList: {
const auto& list_type = static_cast<const ListType&>(type);
nlohmann::json json;
json[kType] = kList;

const auto& element_field = list_type.fields().front();
json[kElementId] = element_field.field_id();
json[kElementRequired] = !element_field.optional();
json[kElement] = TypeToJson(*element_field.type());
return json;
}
case TypeId::kMap: {
const auto& map_type = static_cast<const MapType&>(type);
nlohmann::json json;
json[std::string(kType)] = kMap;

const auto& key_field = map_type.key();
json[kKeyId] = key_field.field_id();
json[kKey] = TypeToJson(*key_field.type());

const auto& value_field = map_type.value();
json[kValueId] = value_field.field_id();
json[kValueRequired] = !value_field.optional();
json[kValue] = TypeToJson(*value_field.type());
return json;
}
case TypeId::kBoolean:
return "boolean";
case TypeId::kInt:
return "int";
case TypeId::kLong:
return "long";
case TypeId::kFloat:
return "float";
case TypeId::kDouble:
return "double";
case TypeId::kDecimal: {
const auto& decimal_type = static_cast<const DecimalType&>(type);
return std::format("decimal({},{})", decimal_type.precision(),
decimal_type.scale());
}
case TypeId::kDate:
return "date";
case TypeId::kTime:
return "time";
case TypeId::kTimestamp:
return "timestamp";
case TypeId::kTimestampTz:
return "timestamptz";
case TypeId::kString:
return "string";
case TypeId::kBinary:
return "binary";
case TypeId::kFixed: {
const auto& fixed_type = static_cast<const FixedType&>(type);
return std::format("fixed[{}]", fixed_type.length());
}
case TypeId::kUuid:
return "uuid";
}
}

nlohmann::json SchemaToJson(const Schema& schema) {
nlohmann::json json = TypeToJson(static_cast<const Type&>(schema));
json[kSchemaId] = schema.schema_id();
// TODO(gangwu): add identifier-field-ids.
return json;
}

namespace {

expected<std::unique_ptr<Type>, Error> StructTypeFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto json_fields, GetJsonValue<nlohmann::json>(json, kFields));

std::vector<SchemaField> fields;
for (const auto& field_json : json_fields) {
ICEBERG_ASSIGN_OR_RAISE(auto field, FieldFromJson(field_json));
fields.emplace_back(std::move(*field));
}

return std::make_unique<StructType>(std::move(fields));
}

expected<std::unique_ptr<Type>, Error> ListTypeFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto element_type, TypeFromJson(json[kElement]));
ICEBERG_ASSIGN_OR_RAISE(auto element_id, GetJsonValue<int32_t>(json, kElementId));
ICEBERG_ASSIGN_OR_RAISE(auto element_required,
GetJsonValue<bool>(json, kElementRequired));

return std::make_unique<ListType>(
SchemaField(element_id, std::string(ListType::kElementName),
std::move(element_type), !element_required));
}

expected<std::unique_ptr<Type>, Error> MapTypeFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(
auto key_type, GetJsonValue<nlohmann::json>(json, kKey).and_then(TypeFromJson));
ICEBERG_ASSIGN_OR_RAISE(
auto value_type, GetJsonValue<nlohmann::json>(json, kValue).and_then(TypeFromJson));

ICEBERG_ASSIGN_OR_RAISE(auto key_id, GetJsonValue<int32_t>(json, kKeyId));
ICEBERG_ASSIGN_OR_RAISE(auto value_id, GetJsonValue<int32_t>(json, kValueId));
ICEBERG_ASSIGN_OR_RAISE(auto value_required, GetJsonValue<bool>(json, kValueRequired));

SchemaField key_field(key_id, std::string(MapType::kKeyName), std::move(key_type),
/*optional=*/false);
SchemaField value_field(value_id, std::string(MapType::kValueName),
std::move(value_type), !value_required);
return std::make_unique<MapType>(std::move(key_field), std::move(value_field));
}

} // namespace

expected<std::unique_ptr<Type>, Error> TypeFromJson(const nlohmann::json& json) {
if (json.is_string()) {
std::string type_str = json.get<std::string>();
if (type_str == "boolean") {
return std::make_unique<BooleanType>();
} else if (type_str == "int") {
return std::make_unique<IntType>();
} else if (type_str == "long") {
return std::make_unique<LongType>();
} else if (type_str == "float") {
return std::make_unique<FloatType>();
} else if (type_str == "double") {
return std::make_unique<DoubleType>();
} else if (type_str == "date") {
return std::make_unique<DateType>();
} else if (type_str == "time") {
return std::make_unique<TimeType>();
} else if (type_str == "timestamp") {
return std::make_unique<TimestampType>();
} else if (type_str == "timestamptz") {
return std::make_unique<TimestampTzType>();
} else if (type_str == "string") {
return std::make_unique<StringType>();
} else if (type_str == "binary") {
return std::make_unique<BinaryType>();
} else if (type_str == "uuid") {
return std::make_unique<UuidType>();
} else if (type_str.starts_with("fixed")) {
std::regex fixed_regex(R"(fixed\[\s*(\d+)\s*\])");
std::smatch match;
if (std::regex_match(type_str, match, fixed_regex)) {
return std::make_unique<FixedType>(std::stoi(match[1].str()));
}
return unexpected<Error>({
.kind = ErrorKind::kJsonParseError,
.message = std::format("Invalid fixed type: {}", type_str),
});
} else if (type_str.starts_with("decimal")) {
std::regex decimal_regex(R"(decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))");
std::smatch match;
if (std::regex_match(type_str, match, decimal_regex)) {
return std::make_unique<DecimalType>(std::stoi(match[1].str()),
std::stoi(match[2].str()));
}
return unexpected<Error>({
.kind = ErrorKind::kJsonParseError,
.message = std::format("Invalid decimal type: {}", type_str),
});
} else {
return unexpected<Error>({
.kind = ErrorKind::kJsonParseError,
.message = std::format("Unknown primitive type: {}", type_str),
});
}
}

// For complex types like struct, list, and map
ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue<std::string>(json, kType));
if (type_str == kStruct) {
return StructTypeFromJson(json);
} else if (type_str == kList) {
return ListTypeFromJson(json);
} else if (type_str == kMap) {
return MapTypeFromJson(json);
} else {
return unexpected<Error>({
.kind = ErrorKind::kJsonParseError,
.message = std::format("Unknown complex type: {}", type_str),
});
}
}

expected<std::unique_ptr<SchemaField>, Error> FieldFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(
auto type, GetJsonValue<nlohmann::json>(json, kType).and_then(TypeFromJson));
ICEBERG_ASSIGN_OR_RAISE(auto field_id, GetJsonValue<int32_t>(json, kId));
ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue<std::string>(json, kName));
ICEBERG_ASSIGN_OR_RAISE(auto required, GetJsonValue<bool>(json, kRequired));

return std::make_unique<SchemaField>(field_id, std::move(name), std::move(type),
!required);
}

expected<std::unique_ptr<Schema>, Error> SchemaFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto schema_id, GetJsonValue<int32_t>(json, kSchemaId));
ICEBERG_ASSIGN_OR_RAISE(auto type, TypeFromJson(json));

if (type->type_id() != TypeId::kStruct) [[unlikely]] {
return unexpected<Error>({
.kind = ErrorKind::kJsonParseError,
.message = std::format("Schema must be a struct type, but got {}", json.dump()),
});
}

auto& struct_type = static_cast<StructType&>(*type);
return FromStructType(std::move(struct_type), schema_id);
}

} // namespace iceberg
Loading
Loading