Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ set(ICEBERG_SOURCES
expression/expression.cc
file_reader.cc
json_internal.cc
name_mapping.cc
partition_field.cc
partition_spec.cc
schema.cc
Expand All @@ -36,7 +37,6 @@ set(ICEBERG_SOURCES
transform.cc
transform_function.cc
type.cc
snapshot.cc
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a CPP guy, but why is snapshot.cc removed here? 🤔

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is duplicate at line 32 :)

util/murmurhash3_internal.cc
util/timepoint.cc)

Expand Down
267 changes: 267 additions & 0 deletions src/iceberg/name_mapping.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/name_mapping.h"

#include <format>
#include <sstream>

#include "iceberg/util/formatter_internal.h"

namespace iceberg {

namespace {

// Helper function to join a list of field names with a dot
std::string JoinByDot(std::span<const std::string> parts) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you can use join_view for this too

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried it but it seems that Clang does not yet implement this.

std::stringstream ss;
for (size_t i = 0; i < parts.size(); ++i) {
if (i > 0) {
ss << ".";
}
ss << parts[i];
}
return ss.str();
}

// Helper class to recursively index MappedField by field id
struct IndexByIdVisitor {
std::unordered_map<int32_t, MappedFieldConstRef> field_by_id;

void Visit(const MappedField& field) {
field_by_id.emplace(field.field_id, std::cref(field));
if (field.nested_mapping != nullptr) {
Visit(*field.nested_mapping);
}
}

void Visit(const MappedFields& fields) {
for (const auto& field : fields.fields()) {
Visit(field);
}
}

void Visit(const NameMapping& name_mapping) { Visit(name_mapping.AsMappedFields()); }
};

// Helper class to recursively index MappedField by field name
struct IndexByNameVisitor {
std::unordered_map<std::string, MappedFieldConstRef> field_by_name;

void Visit(const MappedField& field) {
for (const auto& name : field.names) {
field_by_name.emplace(name, std::cref(field));
}

if (field.nested_mapping != nullptr) {
IndexByNameVisitor nested_visitor;
nested_visitor.Visit(*field.nested_mapping);

for (const auto& [name, mapped_field] : nested_visitor.field_by_name) {
for (const auto& prefix : field.names) {
std::vector<std::string> parts = {prefix, name};
field_by_name.emplace(JoinByDot(parts), std::cref(mapped_field));
}
}
}
}

void Visit(const MappedFields& fields) {
for (const auto& field : fields.fields()) {
Visit(field);
}
}

void Visit(const NameMapping& name_mapping) { Visit(name_mapping.AsMappedFields()); }
};

} // namespace

MappedFields::MappedFields(std::vector<MappedField> fields)
: fields_(std::move(fields)) {}

std::unique_ptr<MappedFields> MappedFields::Make(std::vector<MappedField> fields) {
return std::unique_ptr<MappedFields>(new MappedFields(std::move(fields)));
}

std::optional<MappedFieldConstRef> MappedFields::Field(int32_t id) const {
const auto& id_to_field = LazyIdToField();
if (auto it = id_to_field.find(id); it != id_to_field.cend()) {
return it->second;
}
return std::nullopt;
}

std::optional<int32_t> MappedFields::Id(std::string_view name) const {
const auto& name_to_id = LazyNameToId();
if (auto it = name_to_id.find(name); it != name_to_id.cend()) {
return it->second;
}
return std::nullopt;
}

size_t MappedFields::Size() const { return fields_.size(); }

std::span<const MappedField> MappedFields::fields() const { return fields_; }

const std::unordered_map<std::string_view, int32_t>& MappedFields::LazyNameToId() const {
if (name_to_id_.empty() && !fields_.empty()) {
for (const auto& field : fields_) {
for (const auto& name : field.names) {
name_to_id_.emplace(name, field.field_id);
}
}
}
return name_to_id_;
}

const std::unordered_map<int32_t, MappedFieldConstRef>& MappedFields::LazyIdToField()
const {
if (id_to_field_.empty() && !fields_.empty()) {
for (const auto& field : fields_) {
id_to_field_.emplace(field.field_id, std::cref(field));
}
}
return id_to_field_;
}

NameMapping::NameMapping(std::unique_ptr<MappedFields> mapping)
: mapping_(std::move(mapping)) {}

std::optional<MappedFieldConstRef> NameMapping::Find(int32_t id) {
const auto& fields_by_id = LazyFieldsById();
if (auto iter = fields_by_id.find(id); iter != fields_by_id.cend()) {
return iter->second;
}
return std::nullopt;
}

std::optional<MappedFieldConstRef> NameMapping::Find(std::span<const std::string> names) {
if (names.empty()) {
return std::nullopt;
}
return Find(JoinByDot(names));
}

std::optional<MappedFieldConstRef> NameMapping::Find(const std::string& name) {
const auto& fields_by_name = LazyFieldsByName();
if (auto iter = fields_by_name.find(name); iter != fields_by_name.cend()) {
return iter->second;
}
return std::nullopt;
}

const MappedFields& NameMapping::AsMappedFields() const {
if (mapping_ == nullptr) {
const static std::unique_ptr<MappedFields> kEmptyFields = MappedFields::Make({});
return *kEmptyFields;
}
return *mapping_;
}

const std::unordered_map<int32_t, MappedFieldConstRef>& NameMapping::LazyFieldsById()
const {
if (fields_by_id_.empty()) {
IndexByIdVisitor visitor;
visitor.Visit(AsMappedFields());
fields_by_id_ = std::move(visitor.field_by_id);
}
return fields_by_id_;
}

const std::unordered_map<std::string, MappedFieldConstRef>&
NameMapping::LazyFieldsByName() const {
if (fields_by_name_.empty()) {
IndexByNameVisitor visitor;
visitor.Visit(AsMappedFields());
fields_by_name_ = std::move(visitor.field_by_name);
}
return fields_by_name_;
}

std::unique_ptr<NameMapping> NameMapping::MakeEmpty() {
return std::unique_ptr<NameMapping>(new NameMapping(MappedFields::Make({})));
}

std::unique_ptr<NameMapping> NameMapping::Make(std::unique_ptr<MappedFields> fields) {
return std::unique_ptr<NameMapping>(new NameMapping(std::move(fields)));
}

std::unique_ptr<NameMapping> NameMapping::Make(std::vector<MappedField> fields) {
return Make(MappedFields::Make(std::move(fields)));
}

bool operator==(const MappedField& lhs, const MappedField& rhs) {
if (lhs.field_id != rhs.field_id) {
return false;
}
if (lhs.names != rhs.names) {
return false;
}
if (lhs.nested_mapping == nullptr && rhs.nested_mapping == nullptr) {
return true;
}
if (lhs.nested_mapping == nullptr || rhs.nested_mapping == nullptr) {
return false;
}
return *lhs.nested_mapping == *rhs.nested_mapping;
}

bool operator==(const MappedFields& lhs, const MappedFields& rhs) {
if (lhs.Size() != rhs.Size()) {
return false;
}
auto lhs_fields = lhs.fields();
auto rhs_fields = rhs.fields();
for (size_t i = 0; i < lhs.Size(); ++i) {
if (lhs_fields[i] != rhs_fields[i]) {
return false;
}
}
return true;
}

bool operator==(const NameMapping& lhs, const NameMapping& rhs) {
return lhs.AsMappedFields() == rhs.AsMappedFields();
}

std::string ToString(const MappedField& field) {
return std::format(
"({} -> {}{})", field.names, field.field_id,
field.nested_mapping ? std::format(", {}", ToString(*field.nested_mapping)) : "");
}

std::string ToString(const MappedFields& fields) {
return std::format("{}", fields.fields());
}

std::string ToString(const NameMapping& name_mapping) {
const auto& fields = name_mapping.AsMappedFields();
if (fields.Size() == 0) {
return "[]";
}
std::string repr = "[\n";
for (const auto& field : fields.fields()) {
std::format_to(std::back_inserter(repr), " {}\n", ToString(field));
}
repr += "]";
return repr;
}

} // namespace iceberg
Loading
Loading