Skip to content

Commit 39fbe30

Browse files
committed
feat: add json serde to name mapping
- add roundtrip json conversion to name mapping - change MappedField to use std::shared_ptr for better usability - use optional field_id in MappedField
1 parent 1c2530c commit 39fbe30

File tree

7 files changed

+164
-14
lines changed

7 files changed

+164
-14
lines changed

src/iceberg/json_internal.cc

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
#include <nlohmann/json.hpp>
3030

31+
#include "iceberg/name_mapping.h"
3132
#include "iceberg/partition_field.h"
3233
#include "iceberg/partition_spec.h"
3334
#include "iceberg/result.h"
@@ -71,9 +72,11 @@ constexpr std::string_view kKey = "key";
7172
constexpr std::string_view kValue = "value";
7273
constexpr std::string_view kDoc = "doc";
7374
constexpr std::string_view kName = "name";
75+
constexpr std::string_view kNames = "names";
7476
constexpr std::string_view kId = "id";
7577
constexpr std::string_view kInitialDefault = "initial-default";
7678
constexpr std::string_view kWriteDefault = "write-default";
79+
constexpr std::string_view kFieldId = "field-id";
7780
constexpr std::string_view kElementId = "element-id";
7881
constexpr std::string_view kKeyId = "key-id";
7982
constexpr std::string_view kValueId = "value-id";
@@ -82,7 +85,6 @@ constexpr std::string_view kElementRequired = "element-required";
8285
constexpr std::string_view kValueRequired = "value-required";
8386

8487
// Snapshot constants
85-
constexpr std::string_view kFieldId = "field-id";
8688
constexpr std::string_view kSpecId = "spec-id";
8789
constexpr std::string_view kSnapshotId = "snapshot-id";
8890
constexpr std::string_view kParentSnapshotId = "parent-snapshot-id";
@@ -1232,4 +1234,80 @@ Result<std::string> ToJsonString(const nlohmann::json& json) {
12321234
}
12331235
}
12341236

1237+
nlohmann::json ToJson(const MappedField& field) {
1238+
nlohmann::json json;
1239+
if (field.field_id.has_value()) {
1240+
json[kFieldId] = field.field_id.value();
1241+
}
1242+
1243+
nlohmann::json names = nlohmann::json::array();
1244+
for (const auto& name : field.names) {
1245+
names.push_back(name);
1246+
}
1247+
json[kNames] = names;
1248+
1249+
if (field.nested_mapping != nullptr) {
1250+
json[kFields] = ToJson(*field.nested_mapping);
1251+
}
1252+
return json;
1253+
}
1254+
1255+
Result<MappedField> MappedFieldFromJson(const nlohmann::json& json) {
1256+
if (!json.is_object()) [[unlikely]] {
1257+
return JsonParseError("Cannot parse non-object mapping field: {}",
1258+
SafeDumpJson(json));
1259+
}
1260+
1261+
ICEBERG_ASSIGN_OR_RAISE(std::optional<int32_t> field_id,
1262+
GetJsonValueOptional<int32_t>(json, kFieldId));
1263+
1264+
std::vector<std::string> names;
1265+
if (json.contains(kNames)) {
1266+
ICEBERG_ASSIGN_OR_RAISE(names, GetJsonValue<std::vector<std::string>>(json, kNames));
1267+
}
1268+
1269+
std::unique_ptr<MappedFields> nested_mapping;
1270+
if (json.contains(kFields)) {
1271+
ICEBERG_ASSIGN_OR_RAISE(auto fields_json,
1272+
GetJsonValue<nlohmann::json>(json, kFields));
1273+
ICEBERG_ASSIGN_OR_RAISE(nested_mapping, MappedFieldsFromJson(fields_json));
1274+
}
1275+
1276+
return MappedField{.names = {names.cbegin(), names.cend()},
1277+
.field_id = field_id,
1278+
.nested_mapping = std::move(nested_mapping)};
1279+
}
1280+
1281+
nlohmann::json ToJson(const MappedFields& mapped_fields) {
1282+
nlohmann::json array = nlohmann::json::array();
1283+
for (const auto& field : mapped_fields.fields()) {
1284+
array.push_back(ToJson(field));
1285+
}
1286+
return array;
1287+
}
1288+
1289+
Result<std::unique_ptr<MappedFields>> MappedFieldsFromJson(const nlohmann::json& json) {
1290+
if (!json.is_array()) [[unlikely]] {
1291+
return JsonParseError("Cannot parse non-array mapping fields: {}",
1292+
SafeDumpJson(json));
1293+
}
1294+
1295+
std::vector<MappedField> fields;
1296+
for (const auto& field_json : json) {
1297+
ICEBERG_ASSIGN_OR_RAISE(auto field, MappedFieldFromJson(field_json));
1298+
fields.push_back(std::move(field));
1299+
}
1300+
1301+
return MappedFields::Make(std::move(fields));
1302+
}
1303+
1304+
nlohmann::json ToJson(const NameMapping& name_mapping) {
1305+
return ToJson(name_mapping.AsMappedFields());
1306+
}
1307+
1308+
Result<std::unique_ptr<NameMapping>> NameMappingFromJson(const nlohmann::json& json) {
1309+
ICEBERG_ASSIGN_OR_RAISE(auto mapped_fields, MappedFieldsFromJson(json));
1310+
return NameMapping::Make(std::move(mapped_fields));
1311+
}
1312+
12351313
} // namespace iceberg

src/iceberg/json_internal.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,4 +256,40 @@ Result<nlohmann::json> FromJsonString(const std::string& json_string);
256256
/// \return A JSON string or an error if the serialization fails.
257257
Result<std::string> ToJsonString(const nlohmann::json& json);
258258

259+
/// \brief Serializes a `MappedField` object to JSON.
260+
///
261+
/// \param[in] field The `MappedField` object to be serialized.
262+
/// \return A JSON object representing the `MappedField`.
263+
nlohmann::json ToJson(const MappedField& field);
264+
265+
/// \brief Deserializes a JSON object into a `MappedField` object.
266+
///
267+
/// \param[in] json The JSON object representing a `MappedField`.
268+
/// \return A `MappedField` object or an error if the conversion fails.
269+
Result<MappedField> MappedFieldFromJson(const nlohmann::json& json);
270+
271+
/// \brief Serializes a `MappedFields` object to JSON.
272+
///
273+
/// \param[in] mapped_fields The `MappedFields` object to be serialized.
274+
/// \return A JSON object representing the `MappedFields`.
275+
nlohmann::json ToJson(const MappedFields& mapped_fields);
276+
277+
/// \brief Deserializes a JSON object into a `MappedFields` object.
278+
///
279+
/// \param[in] json The JSON object representing a `MappedFields`.
280+
/// \return A `MappedFields` object or an error if the conversion fails.
281+
Result<std::unique_ptr<MappedFields>> MappedFieldsFromJson(const nlohmann::json& json);
282+
283+
/// \brief Serializes a `NameMapping` object to JSON.
284+
///
285+
/// \param[in] name_mapping The `NameMapping` object to be serialized.
286+
/// \return A JSON object representing the `NameMapping`.
287+
nlohmann::json ToJson(const NameMapping& name_mapping);
288+
289+
/// \brief Deserializes a JSON object into a `NameMapping` object.
290+
///
291+
/// \param[in] json The JSON object representing a `NameMapping`.
292+
/// \return A `NameMapping` object or an error if the conversion fails.
293+
Result<std::unique_ptr<NameMapping>> NameMappingFromJson(const nlohmann::json& json);
294+
259295
} // namespace iceberg

src/iceberg/name_mapping.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ struct IndexByIdVisitor {
4545
std::unordered_map<int32_t, MappedFieldConstRef> field_by_id;
4646

4747
void Visit(const MappedField& field) {
48-
field_by_id.emplace(field.field_id, std::cref(field));
48+
if (field.field_id.has_value()) {
49+
field_by_id.emplace(field.field_id.value(), std::cref(field));
50+
}
4951
if (field.nested_mapping != nullptr) {
5052
Visit(*field.nested_mapping);
5153
}
@@ -124,7 +126,9 @@ const std::unordered_map<std::string_view, int32_t>& MappedFields::LazyNameToId(
124126
if (name_to_id_.empty() && !fields_.empty()) {
125127
for (const auto& field : fields_) {
126128
for (const auto& name : field.names) {
127-
name_to_id_.emplace(name, field.field_id);
129+
if (field.field_id.has_value()) {
130+
name_to_id_.emplace(name, field.field_id.value());
131+
}
128132
}
129133
}
130134
}
@@ -135,7 +139,9 @@ const std::unordered_map<int32_t, MappedFieldConstRef>& MappedFields::LazyIdToFi
135139
const {
136140
if (id_to_field_.empty() && !fields_.empty()) {
137141
for (const auto& field : fields_) {
138-
id_to_field_.emplace(field.field_id, std::cref(field));
142+
if (field.field_id.has_value()) {
143+
id_to_field_.emplace(field.field_id.value(), std::cref(field));
144+
}
139145
}
140146
}
141147
return id_to_field_;
@@ -243,7 +249,8 @@ bool operator==(const NameMapping& lhs, const NameMapping& rhs) {
243249

244250
std::string ToString(const MappedField& field) {
245251
return std::format(
246-
"({} -> {}{})", field.names, field.field_id,
252+
"({} -> {}{})", field.names,
253+
field.field_id.has_value() ? std::to_string(field.field_id.value()) : "null",
247254
field.nested_mapping ? std::format(", {}", ToString(*field.nested_mapping)) : "");
248255
}
249256

src/iceberg/name_mapping.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ struct ICEBERG_EXPORT MappedField {
4040
std::unordered_set<std::string> names;
4141
/// \brief An optional Iceberg field ID used when a field's name is present in `names`.
4242
/// TODO(gangwu): check if we need to make it optional
43-
int32_t field_id;
43+
std::optional<int32_t> field_id;
4444
/// \brief An optional list of field mappings for child field of structs, maps, and
4545
/// lists.
46-
std::unique_ptr<class MappedFields> nested_mapping;
46+
std::shared_ptr<class MappedFields> nested_mapping;
4747

4848
friend bool operator==(const MappedField& lhs, const MappedField& rhs);
4949
};
@@ -76,12 +76,6 @@ class ICEBERG_EXPORT MappedFields {
7676

7777
friend bool operator==(const MappedFields& lhs, const MappedFields& rhs);
7878

79-
MappedFields(const MappedFields& other) = delete;
80-
MappedFields& operator=(const MappedFields& other) = delete;
81-
82-
MappedFields(MappedFields&& other) noexcept = default;
83-
MappedFields& operator=(MappedFields&& other) noexcept = default;
84-
8579
private:
8680
explicit MappedFields(std::vector<MappedField> fields);
8781

src/iceberg/type_fwd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ struct SnapshotRef;
101101
struct StatisticsFile;
102102
struct TableMetadata;
103103

104+
struct MappedField;
105+
class MappedFields;
106+
class NameMapping;
107+
104108
enum class SnapshotRefType;
105109
enum class TransformType;
106110

test/json_internal_test.cc

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,19 @@
2121

2222
#include <memory>
2323

24+
#include <gmock/gmock.h>
2425
#include <gtest/gtest.h>
2526
#include <nlohmann/json.hpp>
2627

27-
#include "gmock/gmock.h"
28+
#include "iceberg/name_mapping.h"
2829
#include "iceberg/partition_spec.h"
2930
#include "iceberg/schema.h"
3031
#include "iceberg/snapshot.h"
3132
#include "iceberg/sort_field.h"
3233
#include "iceberg/sort_order.h"
3334
#include "iceberg/transform.h"
3435
#include "iceberg/util/formatter.h" // IWYU pragma: keep
36+
#include "iceberg/util/macros.h" // IWYU pragma: keep
3537
#include "iceberg/util/timepoint.h"
3638
#include "matchers.h"
3739

@@ -67,6 +69,11 @@ Result<std::unique_ptr<Snapshot>> FromJsonHelper(const nlohmann::json& json) {
6769
return SnapshotFromJson(json);
6870
}
6971

72+
template <>
73+
Result<std::unique_ptr<NameMapping>> FromJsonHelper(const nlohmann::json& json) {
74+
return NameMappingFromJson(json);
75+
}
76+
7077
// Helper function to reduce duplication in testing
7178
template <typename T>
7279
void TestJsonConversion(const T& obj, const nlohmann::json& expected_json) {
@@ -257,4 +264,27 @@ TEST(JsonInternalTest, SnapshotFromJsonSummaryWithNoOperation) {
257264
ASSERT_EQ(result.value()->operation(), DataOperation::kOverwrite);
258265
}
259266

267+
TEST(JsonInternalTest, NameMapping) {
268+
auto mapping = NameMapping::Make(
269+
{MappedField{.names = {"id"}, .field_id = 1},
270+
MappedField{.names = {"data"}, .field_id = 2},
271+
MappedField{.names = {"location"},
272+
.field_id = 3,
273+
.nested_mapping = MappedFields::Make(
274+
{MappedField{.names = {"latitude"}, .field_id = 4},
275+
MappedField{.names = {"longitude"}, .field_id = 5}})}});
276+
277+
nlohmann::json expected_json =
278+
R"([
279+
{"field-id": 1, "names": ["id"]},
280+
{"field-id": 2, "names": ["data"]},
281+
{"field-id": 3, "names": ["location"], "fields": [
282+
{"field-id": 4, "names": ["latitude"]},
283+
{"field-id": 5, "names": ["longitude"]}
284+
]}
285+
])"_json;
286+
287+
TestJsonConversion(*mapping, expected_json);
288+
}
289+
260290
} // namespace iceberg

test/name_mapping_test.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <gmock/gmock.h>
2828
#include <gtest/gtest.h>
29+
2930
namespace iceberg {
3031

3132
class NameMappingTest : public ::testing::Test {

0 commit comments

Comments
 (0)