Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ set(ICEBERG_SOURCES
expression/expression.cc
file_reader.cc
json_internal.cc
metadata_columns.cc
name_mapping.cc
partition_field.cc
partition_spec.cc
Expand Down
78 changes: 78 additions & 0 deletions src/iceberg/metadata_columns.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/metadata_columns.h"

#include <unordered_map>

namespace iceberg {

namespace {

using MetadataColumnMap = std::unordered_map<std::string_view, const SchemaField*>;

const MetadataColumnMap& GetMetadataColumnMap() {
static const MetadataColumnMap kMetadataColumnMap = {
{MetadataColumns::kFilePath.name(), &MetadataColumns::kFilePath},
{MetadataColumns::kRowPosition.name(), &MetadataColumns::kRowPosition},
{MetadataColumns::kIsDeleted.name(), &MetadataColumns::kIsDeleted},
{MetadataColumns::kSpecId.name(), &MetadataColumns::kSpecId},
{MetadataColumns::kRowId.name(), &MetadataColumns::kRowId},
{MetadataColumns::kLastUpdatedSequenceNumber.name(),
&MetadataColumns::kLastUpdatedSequenceNumber}};
return kMetadataColumnMap;
}

const std::set<int32_t>& GetMetadataFieldIdSet() {
static const std::set<int32_t> kMetadataFieldIds = {
MetadataColumns::kFilePath.field_id(),
MetadataColumns::kRowPosition.field_id(),
MetadataColumns::kIsDeleted.field_id(),
MetadataColumns::kSpecId.field_id(),
MetadataColumns::kPartitionColumnId,
MetadataColumns::kRowId.field_id(),
MetadataColumns::kLastUpdatedSequenceNumber.field_id()};
return kMetadataFieldIds;
}

} // namespace

const std::set<int32_t>& MetadataColumns::MetadataFieldIds() {
return GetMetadataFieldIdSet();
}

bool MetadataColumns::IsMetadataColumn(std::string_view name) {
return name == kPartitionColumnName ||
GetMetadataColumnMap().find(name) != GetMetadataColumnMap().end();
}

bool MetadataColumns::IsMetadataColumn(int32_t id) {
return GetMetadataFieldIdSet().find(id) != GetMetadataFieldIdSet().end();
}

Result<const SchemaField*> MetadataColumns::MetadataColumn(std::string_view name) {
const auto& metadata_column_map = GetMetadataColumnMap();
const auto it = metadata_column_map.find(name);
if (it == metadata_column_map.cend()) {
return InvalidArgument("Unknown metadata column: {}", name);
}
return it->second;
}

} // namespace iceberg
120 changes: 120 additions & 0 deletions src/iceberg/metadata_columns.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/metadata_columns.h
/// Metadata columns for reading Iceberg data files.

#include <limits>
#include <memory>
#include <set>
#include <string_view>

#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/type.h"

namespace iceberg {

/// \brief A class containing constants and utility methods for metadata columns
struct ICEBERG_EXPORT MetadataColumns {
constexpr static int32_t kInt32Max = std::numeric_limits<int32_t>::max();

// IDs kInt32Max - (1-100) are used for metadata columns
inline static const SchemaField kFilePath =
SchemaField::MakeRequired(kInt32Max - 1, "_file", std::make_shared<StringType>(),
"Path of the file in which a row is stored");

inline static const SchemaField kRowPosition =
SchemaField::MakeRequired(kInt32Max - 2, "_pos", std::make_shared<LongType>(),
"Ordinal position of a row in the source data file");

inline static const SchemaField kIsDeleted = SchemaField::MakeRequired(
kInt32Max - 3, "_deleted", std::make_shared<BooleanType>(),
"Whether the row has been deleted");

inline static const SchemaField kSpecId =
SchemaField::MakeRequired(kInt32Max - 4, "_spec_id", std::make_shared<IntType>(),
"Spec ID used to track the file containing a row");

// The partition column type depends on all specs in the table
constexpr static int32_t kPartitionColumnId = kInt32Max - 5;
constexpr static std::string_view kPartitionColumnName = "_partition";
constexpr static std::string_view kPartitionColumnDoc =
"Partition to which a row belongs to";

constexpr static int32_t kContentOffsetColumnId = kInt32Max - 6;
constexpr static int32_t kContentSizeInBytesColumnId = kInt32Max - 7;

// IDs kInt32Max - (101-200) are used for reserved columns
inline static const SchemaField kDeleteFilePath = SchemaField::MakeRequired(
kInt32Max - 101, "file_path", std::make_shared<StringType>(),
"Path of a file in which a deleted row is stored");

inline static const SchemaField kDeleteFilePos =
SchemaField::MakeRequired(kInt32Max - 102, "pos", std::make_shared<LongType>(),
"Ordinal position of a deleted row in the data file");

// The row column type depends on the table schema
constexpr static int32_t kDeleteFileRowFieldId = kInt32Max - 103;
constexpr static std::string_view kDeleteFileRowFieldName = "row";
constexpr static std::string_view kDeleteFileRowDoc = "Deleted row values";

inline static const SchemaField kChangeType = SchemaField::MakeRequired(
kInt32Max - 104, "_change_type", std::make_shared<StringType>(),
"Record type in changelog");

inline static const SchemaField kChangeOrdinal = SchemaField::MakeOptional(
kInt32Max - 105, "_change_ordinal", std::make_shared<IntType>(),
"Change ordinal in changelog");

inline static const SchemaField kCommitSnapshotId =
SchemaField::MakeOptional(kInt32Max - 106, "_commit_snapshot_id",
std::make_shared<LongType>(), "Commit snapshot ID");

inline static const SchemaField kRowId =
SchemaField::MakeOptional(kInt32Max - 107, "_row_id", std::make_shared<LongType>(),
"Implicit row ID that is automatically assigned");

inline static const SchemaField kLastUpdatedSequenceNumber = SchemaField::MakeOptional(
kInt32Max - 108, "_last_updated_sequence_number", std::make_shared<LongType>(),
"Sequence number when the row was last updated");

/// \brief Get the set of metadata field IDs.
static const std::set<int32_t>& MetadataFieldIds();

/// \brief Check if a column name is a metadata column.
static bool IsMetadataColumn(std::string_view name);

/// \brief Check if a column ID is a metadata column.
static bool IsMetadataColumn(int32_t id);

/// \brief Get a metadata column by name.
///
/// \param name The name of the metadata column.
/// \return The metadata column, or an error if the name does not refer to a metadata
/// column. The returned pointer is guaranteed to be valid.
static Result<const SchemaField*> MetadataColumn(std::string_view name);

/// TODO(gangwu): add functions to build partition columns from a table schema
};

} // namespace iceberg
Loading