Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 79 additions & 1 deletion src/iceberg/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include <nlohmann/json.hpp>

#include "iceberg/name_mapping.h"
#include "iceberg/partition_field.h"
#include "iceberg/partition_spec.h"
#include "iceberg/result.h"
Expand Down Expand Up @@ -71,9 +72,11 @@ constexpr std::string_view kKey = "key";
constexpr std::string_view kValue = "value";
constexpr std::string_view kDoc = "doc";
constexpr std::string_view kName = "name";
constexpr std::string_view kNames = "names";
constexpr std::string_view kId = "id";
constexpr std::string_view kInitialDefault = "initial-default";
constexpr std::string_view kWriteDefault = "write-default";
constexpr std::string_view kFieldId = "field-id";
constexpr std::string_view kElementId = "element-id";
constexpr std::string_view kKeyId = "key-id";
constexpr std::string_view kValueId = "value-id";
Expand All @@ -82,7 +85,6 @@ constexpr std::string_view kElementRequired = "element-required";
constexpr std::string_view kValueRequired = "value-required";

// Snapshot constants
constexpr std::string_view kFieldId = "field-id";
constexpr std::string_view kSpecId = "spec-id";
constexpr std::string_view kSnapshotId = "snapshot-id";
constexpr std::string_view kParentSnapshotId = "parent-snapshot-id";
Expand Down Expand Up @@ -1232,4 +1234,80 @@ Result<std::string> ToJsonString(const nlohmann::json& json) {
}
}

nlohmann::json ToJson(const MappedField& field) {
nlohmann::json json;
if (field.field_id.has_value()) {
json[kFieldId] = field.field_id.value();
}

nlohmann::json names = nlohmann::json::array();
for (const auto& name : field.names) {
names.push_back(name);
}
json[kNames] = names;

if (field.nested_mapping != nullptr) {
json[kFields] = ToJson(*field.nested_mapping);
}
return json;
}

Result<MappedField> MappedFieldFromJson(const nlohmann::json& json) {
if (!json.is_object()) [[unlikely]] {
return JsonParseError("Cannot parse non-object mapping field: {}",
SafeDumpJson(json));
}

ICEBERG_ASSIGN_OR_RAISE(std::optional<int32_t> field_id,
GetJsonValueOptional<int32_t>(json, kFieldId));

std::vector<std::string> names;
if (json.contains(kNames)) {
ICEBERG_ASSIGN_OR_RAISE(names, GetJsonValue<std::vector<std::string>>(json, kNames));
}

std::unique_ptr<MappedFields> nested_mapping;
if (json.contains(kFields)) {
ICEBERG_ASSIGN_OR_RAISE(auto fields_json,
GetJsonValue<nlohmann::json>(json, kFields));
ICEBERG_ASSIGN_OR_RAISE(nested_mapping, MappedFieldsFromJson(fields_json));
}

return MappedField{.names = {names.cbegin(), names.cend()},
.field_id = field_id,
.nested_mapping = std::move(nested_mapping)};
}

nlohmann::json ToJson(const MappedFields& mapped_fields) {
nlohmann::json array = nlohmann::json::array();
for (const auto& field : mapped_fields.fields()) {
array.push_back(ToJson(field));
}
return array;
}

Result<std::unique_ptr<MappedFields>> MappedFieldsFromJson(const nlohmann::json& json) {
if (!json.is_array()) [[unlikely]] {
return JsonParseError("Cannot parse non-array mapping fields: {}",
SafeDumpJson(json));
}

std::vector<MappedField> fields;
for (const auto& field_json : json) {
ICEBERG_ASSIGN_OR_RAISE(auto field, MappedFieldFromJson(field_json));
fields.push_back(std::move(field));
}

return MappedFields::Make(std::move(fields));
}

nlohmann::json ToJson(const NameMapping& name_mapping) {
return ToJson(name_mapping.AsMappedFields());
}

Result<std::unique_ptr<NameMapping>> NameMappingFromJson(const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto mapped_fields, MappedFieldsFromJson(json));
return NameMapping::Make(std::move(mapped_fields));
}

} // namespace iceberg
36 changes: 36 additions & 0 deletions src/iceberg/json_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,4 +256,40 @@ Result<nlohmann::json> FromJsonString(const std::string& json_string);
/// \return A JSON string or an error if the serialization fails.
Result<std::string> ToJsonString(const nlohmann::json& json);

/// \brief Serializes a `MappedField` object to JSON.
///
/// \param[in] field The `MappedField` object to be serialized.
/// \return A JSON object representing the `MappedField`.
nlohmann::json ToJson(const MappedField& field);

/// \brief Deserializes a JSON object into a `MappedField` object.
///
/// \param[in] json The JSON object representing a `MappedField`.
/// \return A `MappedField` object or an error if the conversion fails.
Result<MappedField> MappedFieldFromJson(const nlohmann::json& json);

/// \brief Serializes a `MappedFields` object to JSON.
///
/// \param[in] mapped_fields The `MappedFields` object to be serialized.
/// \return A JSON object representing the `MappedFields`.
nlohmann::json ToJson(const MappedFields& mapped_fields);

/// \brief Deserializes a JSON object into a `MappedFields` object.
///
/// \param[in] json The JSON object representing a `MappedFields`.
/// \return A `MappedFields` object or an error if the conversion fails.
Result<std::unique_ptr<MappedFields>> MappedFieldsFromJson(const nlohmann::json& json);

/// \brief Serializes a `NameMapping` object to JSON.
///
/// \param[in] name_mapping The `NameMapping` object to be serialized.
/// \return A JSON object representing the `NameMapping`.
nlohmann::json ToJson(const NameMapping& name_mapping);

/// \brief Deserializes a JSON object into a `NameMapping` object.
///
/// \param[in] json The JSON object representing a `NameMapping`.
/// \return A `NameMapping` object or an error if the conversion fails.
Result<std::unique_ptr<NameMapping>> NameMappingFromJson(const nlohmann::json& json);

} // namespace iceberg
15 changes: 11 additions & 4 deletions src/iceberg/name_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ struct IndexByIdVisitor {
std::unordered_map<int32_t, MappedFieldConstRef> field_by_id;

void Visit(const MappedField& field) {
field_by_id.emplace(field.field_id, std::cref(field));
if (field.field_id.has_value()) {
field_by_id.emplace(field.field_id.value(), std::cref(field));
}
if (field.nested_mapping != nullptr) {
Visit(*field.nested_mapping);
}
Expand Down Expand Up @@ -124,7 +126,9 @@ const std::unordered_map<std::string_view, int32_t>& MappedFields::LazyNameToId(
if (name_to_id_.empty() && !fields_.empty()) {
for (const auto& field : fields_) {
for (const auto& name : field.names) {
name_to_id_.emplace(name, field.field_id);
if (field.field_id.has_value()) {
name_to_id_.emplace(name, field.field_id.value());
}
}
}
}
Expand All @@ -135,7 +139,9 @@ const std::unordered_map<int32_t, MappedFieldConstRef>& MappedFields::LazyIdToFi
const {
if (id_to_field_.empty() && !fields_.empty()) {
for (const auto& field : fields_) {
id_to_field_.emplace(field.field_id, std::cref(field));
if (field.field_id.has_value()) {
id_to_field_.emplace(field.field_id.value(), std::cref(field));
}
}
}
return id_to_field_;
Expand Down Expand Up @@ -243,7 +249,8 @@ bool operator==(const NameMapping& lhs, const NameMapping& rhs) {

std::string ToString(const MappedField& field) {
return std::format(
"({} -> {}{})", field.names, field.field_id,
"({} -> {}{})", field.names,
field.field_id.has_value() ? std::to_string(field.field_id.value()) : "null",
field.nested_mapping ? std::format(", {}", ToString(*field.nested_mapping)) : "");
}

Expand Down
11 changes: 2 additions & 9 deletions src/iceberg/name_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@ struct ICEBERG_EXPORT MappedField {
/// \brief A required list of 0 or more names for a field.
std::unordered_set<std::string> names;
/// \brief An optional Iceberg field ID used when a field's name is present in `names`.
/// TODO(gangwu): check if we need to make it optional
int32_t field_id;
std::optional<int32_t> field_id;
/// \brief An optional list of field mappings for child field of structs, maps, and
/// lists.
std::unique_ptr<class MappedFields> nested_mapping;
std::shared_ptr<class MappedFields> nested_mapping;

friend bool operator==(const MappedField& lhs, const MappedField& rhs);
};
Expand Down Expand Up @@ -76,12 +75,6 @@ class ICEBERG_EXPORT MappedFields {

friend bool operator==(const MappedFields& lhs, const MappedFields& rhs);

MappedFields(const MappedFields& other) = delete;
MappedFields& operator=(const MappedFields& other) = delete;

MappedFields(MappedFields&& other) noexcept = default;
MappedFields& operator=(MappedFields&& other) noexcept = default;

private:
explicit MappedFields(std::vector<MappedField> fields);

Expand Down
4 changes: 4 additions & 0 deletions src/iceberg/type_fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ struct SnapshotRef;
struct StatisticsFile;
struct TableMetadata;

struct MappedField;
class MappedFields;
class NameMapping;

enum class SnapshotRefType;
enum class TransformType;

Expand Down
32 changes: 31 additions & 1 deletion test/json_internal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,19 @@

#include <memory>

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <nlohmann/json.hpp>

#include "gmock/gmock.h"
#include "iceberg/name_mapping.h"
#include "iceberg/partition_spec.h"
#include "iceberg/schema.h"
#include "iceberg/snapshot.h"
#include "iceberg/sort_field.h"
#include "iceberg/sort_order.h"
#include "iceberg/transform.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
#include "iceberg/util/macros.h" // IWYU pragma: keep
#include "iceberg/util/timepoint.h"
#include "matchers.h"

Expand Down Expand Up @@ -67,6 +69,11 @@ Result<std::unique_ptr<Snapshot>> FromJsonHelper(const nlohmann::json& json) {
return SnapshotFromJson(json);
}

template <>
Result<std::unique_ptr<NameMapping>> FromJsonHelper(const nlohmann::json& json) {
return NameMappingFromJson(json);
}

// Helper function to reduce duplication in testing
template <typename T>
void TestJsonConversion(const T& obj, const nlohmann::json& expected_json) {
Expand Down Expand Up @@ -257,4 +264,27 @@ TEST(JsonInternalTest, SnapshotFromJsonSummaryWithNoOperation) {
ASSERT_EQ(result.value()->operation(), DataOperation::kOverwrite);
}

TEST(JsonInternalTest, NameMapping) {
auto mapping = NameMapping::Make(
{MappedField{.names = {"id"}, .field_id = 1},
MappedField{.names = {"data"}, .field_id = 2},
MappedField{.names = {"location"},
.field_id = 3,
.nested_mapping = MappedFields::Make(
{MappedField{.names = {"latitude"}, .field_id = 4},
MappedField{.names = {"longitude"}, .field_id = 5}})}});

nlohmann::json expected_json =
R"([
{"field-id": 1, "names": ["id"]},
{"field-id": 2, "names": ["data"]},
{"field-id": 3, "names": ["location"], "fields": [
{"field-id": 4, "names": ["latitude"]},
{"field-id": 5, "names": ["longitude"]}
]}
])"_json;

TestJsonConversion(*mapping, expected_json);
}

} // namespace iceberg
1 change: 1 addition & 0 deletions test/name_mapping_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <gmock/gmock.h>
#include <gtest/gtest.h>

namespace iceberg {

class NameMappingTest : public ::testing::Test {
Expand Down
Loading