2020#include " iceberg/json_internal.h"
2121
2222#include < format>
23+ #include < regex>
2324
2425#include < nlohmann/json.hpp>
2526
27+ #include " iceberg/error.h"
28+ #include " iceberg/expected.h"
29+ #include " iceberg/schema.h"
30+ #include " iceberg/schema_internal.h"
2631#include " iceberg/sort_order.h"
2732#include " iceberg/transform.h"
33+ #include " iceberg/type.h"
2834#include " iceberg/util/formatter.h"
35+ #include " iceberg/util/macros.h"
2936
3037namespace iceberg {
3138
@@ -39,26 +46,46 @@ constexpr std::string_view kNullOrder = "null-order";
3946constexpr std::string_view kOrderId = " order-id" ;
4047constexpr std::string_view kFields = " fields" ;
4148
42- // --- helper for safe JSON extraction ---
49+ constexpr std::string_view kSchemaId = " schema-id" ;
50+ constexpr std::string_view kIdentifierFieldIds = " identifier-field-ids" ;
51+
52+ constexpr std::string_view kType = " type" ;
53+ constexpr std::string_view kStruct = " struct" ;
54+ constexpr std::string_view kList = " list" ;
55+ constexpr std::string_view kMap = " map" ;
56+ constexpr std::string_view kElement = " element" ;
57+ constexpr std::string_view kKey = " key" ;
58+ constexpr std::string_view kValue = " value" ;
59+ constexpr std::string_view kDoc = " doc" ;
60+ constexpr std::string_view kName = " name" ;
61+ constexpr std::string_view kId = " id" ;
62+ constexpr std::string_view kInitialDefault = " initial-default" ;
63+ constexpr std::string_view kWriteDefault = " write-default" ;
64+ constexpr std::string_view kElementId = " element-id" ;
65+ constexpr std::string_view kKeyId = " key-id" ;
66+ constexpr std::string_view kValueId = " value-id" ;
67+ constexpr std::string_view kRequired = " required" ;
68+ constexpr std::string_view kElementRequired = " element-required" ;
69+ constexpr std::string_view kValueRequired = " value-required" ;
70+
4371template <typename T>
4472expected<T, Error> GetJsonValue (const nlohmann::json& json, std::string_view key) {
4573 if (!json.contains (key)) {
46- return unexpected<Error>({.kind = ErrorKind::kInvalidArgument ,
47- .message = " Missing key: " + std::string (key)});
74+ return unexpected<Error>({
75+ .kind = ErrorKind::kJsonParseError ,
76+ .message = std::format (" Missing '{}' in {}" , key, json.dump ()),
77+ });
4878 }
4979 try {
5080 return json.at (key).get <T>();
5181 } catch (const std::exception& ex) {
52- return unexpected<Error>({.kind = ErrorKind::kInvalidArgument ,
53- .message = std::string (" Failed to parse key: " ) +
54- key.data () + " , " + ex.what ()});
82+ return unexpected<Error>({
83+ .kind = ErrorKind::kJsonParseError ,
84+ .message = std::format (" Failed to parse key '{}' in {}" , key, json.dump ()),
85+ });
5586 }
5687}
5788
58- #define TRY_ASSIGN (json_value, expr ) \
59- auto _tmp_##json_value = (expr); \
60- if (!_tmp_##json_value) return unexpected(_tmp_##json_value.error()); \
61- auto json_value = std::move(_tmp_##json_value.value());
6289} // namespace
6390
6491nlohmann::json ToJson (const SortField& sort_field) {
@@ -84,29 +111,265 @@ nlohmann::json ToJson(const SortOrder& sort_order) {
84111
85112expected<std::unique_ptr<SortField>, Error> SortFieldFromJson (
86113 const nlohmann::json& json) {
87- TRY_ASSIGN (transform_str, GetJsonValue<std::string>(json, kTransform ));
88- TRY_ASSIGN (transform, TransformFunctionFromString (transform_str));
89- TRY_ASSIGN (source_id, GetJsonValue<int32_t >(json, kSourceId ));
90- TRY_ASSIGN (direction_str, GetJsonValue<std::string>(json, kDirection ));
91- TRY_ASSIGN (direction, SortDirectionFromString (direction_str));
92- TRY_ASSIGN (null_order_str, GetJsonValue<std::string>(json, kNullOrder ));
93- TRY_ASSIGN (null_order, NullOrderFromString (null_order_str));
94-
114+ ICEBERG_ASSIGN_OR_RAISE (auto source_id, GetJsonValue<int32_t >(json, kSourceId ));
115+ ICEBERG_ASSIGN_OR_RAISE (
116+ auto transform,
117+ GetJsonValue<std::string>(json, kTransform ).and_then (TransformFunctionFromString));
118+ ICEBERG_ASSIGN_OR_RAISE (
119+ auto direction,
120+ GetJsonValue<std::string>(json, kDirection ).and_then (SortDirectionFromString));
121+ ICEBERG_ASSIGN_OR_RAISE (
122+ auto null_order,
123+ GetJsonValue<std::string>(json, kNullOrder ).and_then (NullOrderFromString));
95124 return std::make_unique<SortField>(source_id, std::move (transform), direction,
96125 null_order);
97126}
98127
99128expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson (
100129 const nlohmann::json& json) {
101- TRY_ASSIGN (order_id, GetJsonValue<int32_t >(json, kOrderId ));
130+ ICEBERG_ASSIGN_OR_RAISE (auto order_id, GetJsonValue<int32_t >(json, kOrderId ));
131+ ICEBERG_ASSIGN_OR_RAISE (auto fields, GetJsonValue<nlohmann::json>(json, kFields ));
102132
103133 std::vector<SortField> sort_fields;
104- for (const auto & field_json : json. at ( kFields ) ) {
105- TRY_ASSIGN ( sort_field, SortFieldFromJson (field_json));
106- sort_fields.push_back (*sort_field);
134+ for (const auto & field_json : fields ) {
135+ ICEBERG_ASSIGN_OR_RAISE ( auto sort_field, SortFieldFromJson (field_json));
136+ sort_fields.push_back (std::move ( *sort_field) );
107137 }
108-
109138 return std::make_unique<SortOrder>(order_id, std::move (sort_fields));
110139}
111140
141+ nlohmann::json FieldToJson (const SchemaField& field) {
142+ nlohmann::json json;
143+ json[kId ] = field.field_id ();
144+ json[kName ] = field.name ();
145+ json[kRequired ] = !field.optional ();
146+ json[kType ] = TypeToJson (*field.type ());
147+ return json;
148+ }
149+
150+ nlohmann::json TypeToJson (const Type& type) {
151+ switch (type.type_id ()) {
152+ case TypeId::kStruct : {
153+ const auto & struct_type = static_cast <const StructType&>(type);
154+ nlohmann::json json;
155+ json[kType ] = kStruct ;
156+ nlohmann::json fields_json = nlohmann::json::array ();
157+ for (const auto & field : struct_type.fields ()) {
158+ fields_json.push_back (FieldToJson (field));
159+ // TODO(gangwu): add default values
160+ }
161+ json[kFields ] = fields_json;
162+ return json;
163+ }
164+ case TypeId::kList : {
165+ const auto & list_type = static_cast <const ListType&>(type);
166+ nlohmann::json json;
167+ json[kType ] = kList ;
168+
169+ const auto & element_field = list_type.fields ().front ();
170+ json[kElementId ] = element_field.field_id ();
171+ json[kElementRequired ] = !element_field.optional ();
172+ json[kElement ] = TypeToJson (*element_field.type ());
173+ return json;
174+ }
175+ case TypeId::kMap : {
176+ const auto & map_type = static_cast <const MapType&>(type);
177+ nlohmann::json json;
178+ json[std::string (kType )] = kMap ;
179+
180+ const auto & key_field = map_type.key ();
181+ json[kKeyId ] = key_field.field_id ();
182+ json[kKey ] = TypeToJson (*key_field.type ());
183+
184+ const auto & value_field = map_type.value ();
185+ json[kValueId ] = value_field.field_id ();
186+ json[kValueRequired ] = !value_field.optional ();
187+ json[kValue ] = TypeToJson (*value_field.type ());
188+ return json;
189+ }
190+ case TypeId::kBoolean :
191+ return " boolean" ;
192+ case TypeId::kInt :
193+ return " int" ;
194+ case TypeId::kLong :
195+ return " long" ;
196+ case TypeId::kFloat :
197+ return " float" ;
198+ case TypeId::kDouble :
199+ return " double" ;
200+ case TypeId::kDecimal : {
201+ const auto & decimal_type = static_cast <const DecimalType&>(type);
202+ return std::format (" decimal({},{})" , decimal_type.precision (),
203+ decimal_type.scale ());
204+ }
205+ case TypeId::kDate :
206+ return " date" ;
207+ case TypeId::kTime :
208+ return " time" ;
209+ case TypeId::kTimestamp :
210+ return " timestamp" ;
211+ case TypeId::kTimestampTz :
212+ return " timestamptz" ;
213+ case TypeId::kString :
214+ return " string" ;
215+ case TypeId::kBinary :
216+ return " binary" ;
217+ case TypeId::kFixed : {
218+ const auto & fixed_type = static_cast <const FixedType&>(type);
219+ return std::format (" fixed[{}]" , fixed_type.length ());
220+ }
221+ case TypeId::kUuid :
222+ return " uuid" ;
223+ }
224+ }
225+
226+ nlohmann::json SchemaToJson (const Schema& schema) {
227+ nlohmann::json json = TypeToJson (static_cast <const Type&>(schema));
228+ json[kSchemaId ] = schema.schema_id ();
229+ // TODO(gangwu): add identifier-field-ids.
230+ return json;
231+ }
232+
233+ namespace {
234+
235+ expected<std::unique_ptr<Type>, Error> StructTypeFromJson (const nlohmann::json& json) {
236+ ICEBERG_ASSIGN_OR_RAISE (auto json_fields, GetJsonValue<nlohmann::json>(json, kFields ));
237+
238+ std::vector<SchemaField> fields;
239+ for (const auto & field_json : json_fields) {
240+ ICEBERG_ASSIGN_OR_RAISE (auto field, FieldFromJson (field_json));
241+ fields.emplace_back (std::move (*field));
242+ }
243+
244+ return std::make_unique<StructType>(std::move (fields));
245+ }
246+
247+ expected<std::unique_ptr<Type>, Error> ListTypeFromJson (const nlohmann::json& json) {
248+ ICEBERG_ASSIGN_OR_RAISE (auto element_type, TypeFromJson (json[kElement ]));
249+ ICEBERG_ASSIGN_OR_RAISE (auto element_id, GetJsonValue<int32_t >(json, kElementId ));
250+ ICEBERG_ASSIGN_OR_RAISE (auto element_required,
251+ GetJsonValue<bool >(json, kElementRequired ));
252+
253+ return std::make_unique<ListType>(
254+ SchemaField (element_id, std::string (ListType::kElementName ),
255+ std::move (element_type), !element_required));
256+ }
257+
258+ expected<std::unique_ptr<Type>, Error> MapTypeFromJson (const nlohmann::json& json) {
259+ ICEBERG_ASSIGN_OR_RAISE (
260+ auto key_type, GetJsonValue<nlohmann::json>(json, kKey ).and_then (TypeFromJson));
261+ ICEBERG_ASSIGN_OR_RAISE (
262+ auto value_type, GetJsonValue<nlohmann::json>(json, kValue ).and_then (TypeFromJson));
263+
264+ ICEBERG_ASSIGN_OR_RAISE (auto key_id, GetJsonValue<int32_t >(json, kKeyId ));
265+ ICEBERG_ASSIGN_OR_RAISE (auto value_id, GetJsonValue<int32_t >(json, kValueId ));
266+ ICEBERG_ASSIGN_OR_RAISE (auto value_required, GetJsonValue<bool >(json, kValueRequired ));
267+
268+ SchemaField key_field (key_id, std::string (MapType::kKeyName ), std::move (key_type),
269+ /* optional=*/ false );
270+ SchemaField value_field (value_id, std::string (MapType::kValueName ),
271+ std::move (value_type), !value_required);
272+ return std::make_unique<MapType>(std::move (key_field), std::move (value_field));
273+ }
274+
275+ } // namespace
276+
277+ expected<std::unique_ptr<Type>, Error> TypeFromJson (const nlohmann::json& json) {
278+ if (json.is_string ()) {
279+ std::string type_str = json.get <std::string>();
280+ if (type_str == " boolean" ) {
281+ return std::make_unique<BooleanType>();
282+ } else if (type_str == " int" ) {
283+ return std::make_unique<IntType>();
284+ } else if (type_str == " long" ) {
285+ return std::make_unique<LongType>();
286+ } else if (type_str == " float" ) {
287+ return std::make_unique<FloatType>();
288+ } else if (type_str == " double" ) {
289+ return std::make_unique<DoubleType>();
290+ } else if (type_str == " date" ) {
291+ return std::make_unique<DateType>();
292+ } else if (type_str == " time" ) {
293+ return std::make_unique<TimeType>();
294+ } else if (type_str == " timestamp" ) {
295+ return std::make_unique<TimestampType>();
296+ } else if (type_str == " timestamptz" ) {
297+ return std::make_unique<TimestampTzType>();
298+ } else if (type_str == " string" ) {
299+ return std::make_unique<StringType>();
300+ } else if (type_str == " binary" ) {
301+ return std::make_unique<BinaryType>();
302+ } else if (type_str == " uuid" ) {
303+ return std::make_unique<UuidType>();
304+ } else if (type_str.starts_with (" fixed" )) {
305+ std::regex fixed_regex (R"( fixed\[\s*(\d+)\s*\])" );
306+ std::smatch match;
307+ if (std::regex_match (type_str, match, fixed_regex)) {
308+ return std::make_unique<FixedType>(std::stoi (match[1 ].str ()));
309+ }
310+ return unexpected<Error>({
311+ .kind = ErrorKind::kJsonParseError ,
312+ .message = std::format (" Invalid fixed type: {}" , type_str),
313+ });
314+ } else if (type_str.starts_with (" decimal" )) {
315+ std::regex decimal_regex (R"( decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))" );
316+ std::smatch match;
317+ if (std::regex_match (type_str, match, decimal_regex)) {
318+ return std::make_unique<DecimalType>(std::stoi (match[1 ].str ()),
319+ std::stoi (match[2 ].str ()));
320+ }
321+ return unexpected<Error>({
322+ .kind = ErrorKind::kJsonParseError ,
323+ .message = std::format (" Invalid decimal type: {}" , type_str),
324+ });
325+ } else {
326+ return unexpected<Error>({
327+ .kind = ErrorKind::kJsonParseError ,
328+ .message = std::format (" Unknown primitive type: {}" , type_str),
329+ });
330+ }
331+ }
332+
333+ // For complex types like struct, list, and map
334+ ICEBERG_ASSIGN_OR_RAISE (auto type_str, GetJsonValue<std::string>(json, kType ));
335+ if (type_str == kStruct ) {
336+ return StructTypeFromJson (json);
337+ } else if (type_str == kList ) {
338+ return ListTypeFromJson (json);
339+ } else if (type_str == kMap ) {
340+ return MapTypeFromJson (json);
341+ } else {
342+ return unexpected<Error>({
343+ .kind = ErrorKind::kJsonParseError ,
344+ .message = std::format (" Unknown complex type: {}" , type_str),
345+ });
346+ }
347+ }
348+
349+ expected<std::unique_ptr<SchemaField>, Error> FieldFromJson (const nlohmann::json& json) {
350+ ICEBERG_ASSIGN_OR_RAISE (
351+ auto type, GetJsonValue<nlohmann::json>(json, kType ).and_then (TypeFromJson));
352+ ICEBERG_ASSIGN_OR_RAISE (auto field_id, GetJsonValue<int32_t >(json, kId ));
353+ ICEBERG_ASSIGN_OR_RAISE (auto name, GetJsonValue<std::string>(json, kName ));
354+ ICEBERG_ASSIGN_OR_RAISE (auto required, GetJsonValue<bool >(json, kRequired ));
355+
356+ return std::make_unique<SchemaField>(field_id, std::move (name), std::move (type),
357+ !required);
358+ }
359+
360+ expected<std::unique_ptr<Schema>, Error> SchemaFromJson (const nlohmann::json& json) {
361+ ICEBERG_ASSIGN_OR_RAISE (auto schema_id, GetJsonValue<int32_t >(json, kSchemaId ));
362+ ICEBERG_ASSIGN_OR_RAISE (auto type, TypeFromJson (json));
363+
364+ if (type->type_id () != TypeId::kStruct ) [[unlikely]] {
365+ return unexpected<Error>({
366+ .kind = ErrorKind::kJsonParseError ,
367+ .message = std::format (" Schema must be a struct type, but got {}" , json.dump ()),
368+ });
369+ }
370+
371+ auto & struct_type = static_cast <StructType&>(*type);
372+ return FromStructType (std::move (struct_type), schema_id);
373+ }
374+
112375} // namespace iceberg
0 commit comments