Skip to content

Commit 22adac2

Browse files
authored
refactor: consolidate json utility (#66)
1 parent 4a5fe91 commit 22adac2

File tree

5 files changed

+332
-344
lines changed

5 files changed

+332
-344
lines changed

src/iceberg/json_internal.cc

Lines changed: 286 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,19 @@
2020
#include "iceberg/json_internal.h"
2121

2222
#include <format>
23+
#include <regex>
2324

2425
#include <nlohmann/json.hpp>
2526

27+
#include "iceberg/error.h"
28+
#include "iceberg/expected.h"
29+
#include "iceberg/schema.h"
30+
#include "iceberg/schema_internal.h"
2631
#include "iceberg/sort_order.h"
2732
#include "iceberg/transform.h"
33+
#include "iceberg/type.h"
2834
#include "iceberg/util/formatter.h"
35+
#include "iceberg/util/macros.h"
2936

3037
namespace iceberg {
3138

@@ -39,26 +46,46 @@ constexpr std::string_view kNullOrder = "null-order";
3946
constexpr std::string_view kOrderId = "order-id";
4047
constexpr std::string_view kFields = "fields";
4148

42-
// --- helper for safe JSON extraction ---
49+
constexpr std::string_view kSchemaId = "schema-id";
50+
constexpr std::string_view kIdentifierFieldIds = "identifier-field-ids";
51+
52+
constexpr std::string_view kType = "type";
53+
constexpr std::string_view kStruct = "struct";
54+
constexpr std::string_view kList = "list";
55+
constexpr std::string_view kMap = "map";
56+
constexpr std::string_view kElement = "element";
57+
constexpr std::string_view kKey = "key";
58+
constexpr std::string_view kValue = "value";
59+
constexpr std::string_view kDoc = "doc";
60+
constexpr std::string_view kName = "name";
61+
constexpr std::string_view kId = "id";
62+
constexpr std::string_view kInitialDefault = "initial-default";
63+
constexpr std::string_view kWriteDefault = "write-default";
64+
constexpr std::string_view kElementId = "element-id";
65+
constexpr std::string_view kKeyId = "key-id";
66+
constexpr std::string_view kValueId = "value-id";
67+
constexpr std::string_view kRequired = "required";
68+
constexpr std::string_view kElementRequired = "element-required";
69+
constexpr std::string_view kValueRequired = "value-required";
70+
4371
template <typename T>
4472
expected<T, Error> GetJsonValue(const nlohmann::json& json, std::string_view key) {
4573
if (!json.contains(key)) {
46-
return unexpected<Error>({.kind = ErrorKind::kInvalidArgument,
47-
.message = "Missing key: " + std::string(key)});
74+
return unexpected<Error>({
75+
.kind = ErrorKind::kJsonParseError,
76+
.message = std::format("Missing '{}' in {}", key, json.dump()),
77+
});
4878
}
4979
try {
5080
return json.at(key).get<T>();
5181
} catch (const std::exception& ex) {
52-
return unexpected<Error>({.kind = ErrorKind::kInvalidArgument,
53-
.message = std::string("Failed to parse key: ") +
54-
key.data() + ", " + ex.what()});
82+
return unexpected<Error>({
83+
.kind = ErrorKind::kJsonParseError,
84+
.message = std::format("Failed to parse key '{}' in {}", key, json.dump()),
85+
});
5586
}
5687
}
5788

58-
#define TRY_ASSIGN(json_value, expr) \
59-
auto _tmp_##json_value = (expr); \
60-
if (!_tmp_##json_value) return unexpected(_tmp_##json_value.error()); \
61-
auto json_value = std::move(_tmp_##json_value.value());
6289
} // namespace
6390

6491
nlohmann::json ToJson(const SortField& sort_field) {
@@ -84,29 +111,265 @@ nlohmann::json ToJson(const SortOrder& sort_order) {
84111

85112
expected<std::unique_ptr<SortField>, Error> SortFieldFromJson(
86113
const nlohmann::json& json) {
87-
TRY_ASSIGN(transform_str, GetJsonValue<std::string>(json, kTransform));
88-
TRY_ASSIGN(transform, TransformFunctionFromString(transform_str));
89-
TRY_ASSIGN(source_id, GetJsonValue<int32_t>(json, kSourceId));
90-
TRY_ASSIGN(direction_str, GetJsonValue<std::string>(json, kDirection));
91-
TRY_ASSIGN(direction, SortDirectionFromString(direction_str));
92-
TRY_ASSIGN(null_order_str, GetJsonValue<std::string>(json, kNullOrder));
93-
TRY_ASSIGN(null_order, NullOrderFromString(null_order_str));
94-
114+
ICEBERG_ASSIGN_OR_RAISE(auto source_id, GetJsonValue<int32_t>(json, kSourceId));
115+
ICEBERG_ASSIGN_OR_RAISE(
116+
auto transform,
117+
GetJsonValue<std::string>(json, kTransform).and_then(TransformFunctionFromString));
118+
ICEBERG_ASSIGN_OR_RAISE(
119+
auto direction,
120+
GetJsonValue<std::string>(json, kDirection).and_then(SortDirectionFromString));
121+
ICEBERG_ASSIGN_OR_RAISE(
122+
auto null_order,
123+
GetJsonValue<std::string>(json, kNullOrder).and_then(NullOrderFromString));
95124
return std::make_unique<SortField>(source_id, std::move(transform), direction,
96125
null_order);
97126
}
98127

99128
expected<std::unique_ptr<SortOrder>, Error> SortOrderFromJson(
100129
const nlohmann::json& json) {
101-
TRY_ASSIGN(order_id, GetJsonValue<int32_t>(json, kOrderId));
130+
ICEBERG_ASSIGN_OR_RAISE(auto order_id, GetJsonValue<int32_t>(json, kOrderId));
131+
ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue<nlohmann::json>(json, kFields));
102132

103133
std::vector<SortField> sort_fields;
104-
for (const auto& field_json : json.at(kFields)) {
105-
TRY_ASSIGN(sort_field, SortFieldFromJson(field_json));
106-
sort_fields.push_back(*sort_field);
134+
for (const auto& field_json : fields) {
135+
ICEBERG_ASSIGN_OR_RAISE(auto sort_field, SortFieldFromJson(field_json));
136+
sort_fields.push_back(std::move(*sort_field));
107137
}
108-
109138
return std::make_unique<SortOrder>(order_id, std::move(sort_fields));
110139
}
111140

141+
nlohmann::json FieldToJson(const SchemaField& field) {
142+
nlohmann::json json;
143+
json[kId] = field.field_id();
144+
json[kName] = field.name();
145+
json[kRequired] = !field.optional();
146+
json[kType] = TypeToJson(*field.type());
147+
return json;
148+
}
149+
150+
nlohmann::json TypeToJson(const Type& type) {
151+
switch (type.type_id()) {
152+
case TypeId::kStruct: {
153+
const auto& struct_type = static_cast<const StructType&>(type);
154+
nlohmann::json json;
155+
json[kType] = kStruct;
156+
nlohmann::json fields_json = nlohmann::json::array();
157+
for (const auto& field : struct_type.fields()) {
158+
fields_json.push_back(FieldToJson(field));
159+
// TODO(gangwu): add default values
160+
}
161+
json[kFields] = fields_json;
162+
return json;
163+
}
164+
case TypeId::kList: {
165+
const auto& list_type = static_cast<const ListType&>(type);
166+
nlohmann::json json;
167+
json[kType] = kList;
168+
169+
const auto& element_field = list_type.fields().front();
170+
json[kElementId] = element_field.field_id();
171+
json[kElementRequired] = !element_field.optional();
172+
json[kElement] = TypeToJson(*element_field.type());
173+
return json;
174+
}
175+
case TypeId::kMap: {
176+
const auto& map_type = static_cast<const MapType&>(type);
177+
nlohmann::json json;
178+
json[std::string(kType)] = kMap;
179+
180+
const auto& key_field = map_type.key();
181+
json[kKeyId] = key_field.field_id();
182+
json[kKey] = TypeToJson(*key_field.type());
183+
184+
const auto& value_field = map_type.value();
185+
json[kValueId] = value_field.field_id();
186+
json[kValueRequired] = !value_field.optional();
187+
json[kValue] = TypeToJson(*value_field.type());
188+
return json;
189+
}
190+
case TypeId::kBoolean:
191+
return "boolean";
192+
case TypeId::kInt:
193+
return "int";
194+
case TypeId::kLong:
195+
return "long";
196+
case TypeId::kFloat:
197+
return "float";
198+
case TypeId::kDouble:
199+
return "double";
200+
case TypeId::kDecimal: {
201+
const auto& decimal_type = static_cast<const DecimalType&>(type);
202+
return std::format("decimal({},{})", decimal_type.precision(),
203+
decimal_type.scale());
204+
}
205+
case TypeId::kDate:
206+
return "date";
207+
case TypeId::kTime:
208+
return "time";
209+
case TypeId::kTimestamp:
210+
return "timestamp";
211+
case TypeId::kTimestampTz:
212+
return "timestamptz";
213+
case TypeId::kString:
214+
return "string";
215+
case TypeId::kBinary:
216+
return "binary";
217+
case TypeId::kFixed: {
218+
const auto& fixed_type = static_cast<const FixedType&>(type);
219+
return std::format("fixed[{}]", fixed_type.length());
220+
}
221+
case TypeId::kUuid:
222+
return "uuid";
223+
}
224+
}
225+
226+
nlohmann::json SchemaToJson(const Schema& schema) {
227+
nlohmann::json json = TypeToJson(static_cast<const Type&>(schema));
228+
json[kSchemaId] = schema.schema_id();
229+
// TODO(gangwu): add identifier-field-ids.
230+
return json;
231+
}
232+
233+
namespace {
234+
235+
expected<std::unique_ptr<Type>, Error> StructTypeFromJson(const nlohmann::json& json) {
236+
ICEBERG_ASSIGN_OR_RAISE(auto json_fields, GetJsonValue<nlohmann::json>(json, kFields));
237+
238+
std::vector<SchemaField> fields;
239+
for (const auto& field_json : json_fields) {
240+
ICEBERG_ASSIGN_OR_RAISE(auto field, FieldFromJson(field_json));
241+
fields.emplace_back(std::move(*field));
242+
}
243+
244+
return std::make_unique<StructType>(std::move(fields));
245+
}
246+
247+
expected<std::unique_ptr<Type>, Error> ListTypeFromJson(const nlohmann::json& json) {
248+
ICEBERG_ASSIGN_OR_RAISE(auto element_type, TypeFromJson(json[kElement]));
249+
ICEBERG_ASSIGN_OR_RAISE(auto element_id, GetJsonValue<int32_t>(json, kElementId));
250+
ICEBERG_ASSIGN_OR_RAISE(auto element_required,
251+
GetJsonValue<bool>(json, kElementRequired));
252+
253+
return std::make_unique<ListType>(
254+
SchemaField(element_id, std::string(ListType::kElementName),
255+
std::move(element_type), !element_required));
256+
}
257+
258+
expected<std::unique_ptr<Type>, Error> MapTypeFromJson(const nlohmann::json& json) {
259+
ICEBERG_ASSIGN_OR_RAISE(
260+
auto key_type, GetJsonValue<nlohmann::json>(json, kKey).and_then(TypeFromJson));
261+
ICEBERG_ASSIGN_OR_RAISE(
262+
auto value_type, GetJsonValue<nlohmann::json>(json, kValue).and_then(TypeFromJson));
263+
264+
ICEBERG_ASSIGN_OR_RAISE(auto key_id, GetJsonValue<int32_t>(json, kKeyId));
265+
ICEBERG_ASSIGN_OR_RAISE(auto value_id, GetJsonValue<int32_t>(json, kValueId));
266+
ICEBERG_ASSIGN_OR_RAISE(auto value_required, GetJsonValue<bool>(json, kValueRequired));
267+
268+
SchemaField key_field(key_id, std::string(MapType::kKeyName), std::move(key_type),
269+
/*optional=*/false);
270+
SchemaField value_field(value_id, std::string(MapType::kValueName),
271+
std::move(value_type), !value_required);
272+
return std::make_unique<MapType>(std::move(key_field), std::move(value_field));
273+
}
274+
275+
} // namespace
276+
277+
expected<std::unique_ptr<Type>, Error> TypeFromJson(const nlohmann::json& json) {
278+
if (json.is_string()) {
279+
std::string type_str = json.get<std::string>();
280+
if (type_str == "boolean") {
281+
return std::make_unique<BooleanType>();
282+
} else if (type_str == "int") {
283+
return std::make_unique<IntType>();
284+
} else if (type_str == "long") {
285+
return std::make_unique<LongType>();
286+
} else if (type_str == "float") {
287+
return std::make_unique<FloatType>();
288+
} else if (type_str == "double") {
289+
return std::make_unique<DoubleType>();
290+
} else if (type_str == "date") {
291+
return std::make_unique<DateType>();
292+
} else if (type_str == "time") {
293+
return std::make_unique<TimeType>();
294+
} else if (type_str == "timestamp") {
295+
return std::make_unique<TimestampType>();
296+
} else if (type_str == "timestamptz") {
297+
return std::make_unique<TimestampTzType>();
298+
} else if (type_str == "string") {
299+
return std::make_unique<StringType>();
300+
} else if (type_str == "binary") {
301+
return std::make_unique<BinaryType>();
302+
} else if (type_str == "uuid") {
303+
return std::make_unique<UuidType>();
304+
} else if (type_str.starts_with("fixed")) {
305+
std::regex fixed_regex(R"(fixed\[\s*(\d+)\s*\])");
306+
std::smatch match;
307+
if (std::regex_match(type_str, match, fixed_regex)) {
308+
return std::make_unique<FixedType>(std::stoi(match[1].str()));
309+
}
310+
return unexpected<Error>({
311+
.kind = ErrorKind::kJsonParseError,
312+
.message = std::format("Invalid fixed type: {}", type_str),
313+
});
314+
} else if (type_str.starts_with("decimal")) {
315+
std::regex decimal_regex(R"(decimal\(\s*(\d+)\s*,\s*(\d+)\s*\))");
316+
std::smatch match;
317+
if (std::regex_match(type_str, match, decimal_regex)) {
318+
return std::make_unique<DecimalType>(std::stoi(match[1].str()),
319+
std::stoi(match[2].str()));
320+
}
321+
return unexpected<Error>({
322+
.kind = ErrorKind::kJsonParseError,
323+
.message = std::format("Invalid decimal type: {}", type_str),
324+
});
325+
} else {
326+
return unexpected<Error>({
327+
.kind = ErrorKind::kJsonParseError,
328+
.message = std::format("Unknown primitive type: {}", type_str),
329+
});
330+
}
331+
}
332+
333+
// For complex types like struct, list, and map
334+
ICEBERG_ASSIGN_OR_RAISE(auto type_str, GetJsonValue<std::string>(json, kType));
335+
if (type_str == kStruct) {
336+
return StructTypeFromJson(json);
337+
} else if (type_str == kList) {
338+
return ListTypeFromJson(json);
339+
} else if (type_str == kMap) {
340+
return MapTypeFromJson(json);
341+
} else {
342+
return unexpected<Error>({
343+
.kind = ErrorKind::kJsonParseError,
344+
.message = std::format("Unknown complex type: {}", type_str),
345+
});
346+
}
347+
}
348+
349+
expected<std::unique_ptr<SchemaField>, Error> FieldFromJson(const nlohmann::json& json) {
350+
ICEBERG_ASSIGN_OR_RAISE(
351+
auto type, GetJsonValue<nlohmann::json>(json, kType).and_then(TypeFromJson));
352+
ICEBERG_ASSIGN_OR_RAISE(auto field_id, GetJsonValue<int32_t>(json, kId));
353+
ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue<std::string>(json, kName));
354+
ICEBERG_ASSIGN_OR_RAISE(auto required, GetJsonValue<bool>(json, kRequired));
355+
356+
return std::make_unique<SchemaField>(field_id, std::move(name), std::move(type),
357+
!required);
358+
}
359+
360+
expected<std::unique_ptr<Schema>, Error> SchemaFromJson(const nlohmann::json& json) {
361+
ICEBERG_ASSIGN_OR_RAISE(auto schema_id, GetJsonValue<int32_t>(json, kSchemaId));
362+
ICEBERG_ASSIGN_OR_RAISE(auto type, TypeFromJson(json));
363+
364+
if (type->type_id() != TypeId::kStruct) [[unlikely]] {
365+
return unexpected<Error>({
366+
.kind = ErrorKind::kJsonParseError,
367+
.message = std::format("Schema must be a struct type, but got {}", json.dump()),
368+
});
369+
}
370+
371+
auto& struct_type = static_cast<StructType&>(*type);
372+
return FromStructType(std::move(struct_type), schema_id);
373+
}
374+
112375
} // namespace iceberg

0 commit comments

Comments
 (0)