Skip to content

Commit 060b45d

Browse files
committed
Add metadata column helper functions
1 parent 9e88edf commit 060b45d

File tree

4 files changed

+49
-19
lines changed

4 files changed

+49
-19
lines changed

crates/iceberg/src/arrow/reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator;
5454
use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator;
5555
use crate::expr::{BoundPredicate, BoundReference};
5656
use crate::io::{FileIO, FileMetadata, FileRead};
57-
use crate::metadata_columns::{RESERVED_FIELD_ID_FILE, is_reserved_field};
57+
use crate::metadata_columns::{RESERVED_FIELD_ID_FILE, is_metadata_field};
5858
use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream};
5959
use crate::spec::{Datum, NestedField, PrimitiveLiteral, PrimitiveType, Schema, Type};
6060
use crate::utils::available_parallelism;
@@ -225,7 +225,7 @@ impl ArrowReader {
225225
let project_field_ids_without_reserved: Vec<i32> = task
226226
.project_field_ids
227227
.iter()
228-
.filter(|&&id| !is_reserved_field(id))
228+
.filter(|&&id| !is_metadata_field(id))
229229
.copied()
230230
.collect();
231231
// so we must use position-based projection instead of field-ID matching

crates/iceberg/src/arrow/record_batch_transformer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use arrow_schema::{
3030
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
3131

3232
use crate::arrow::schema_to_arrow_schema;
33-
use crate::metadata_columns::get_reserved_field_name;
33+
use crate::metadata_columns::get_metadata_column_name;
3434
use crate::spec::{Literal, PrimitiveLiteral, Schema as IcebergSchema};
3535
use crate::{Error, ErrorKind, Result};
3636

@@ -217,7 +217,7 @@ impl RecordBatchTransformer {
217217
if let Some(constant_value) = constants_map.get(field_id) {
218218
// Create a field for the virtual column based on the constant type
219219
let arrow_type = Self::primitive_literal_to_arrow_type(constant_value)?;
220-
let field_name = get_reserved_field_name(*field_id)?;
220+
let field_name = get_metadata_column_name(*field_id)?;
221221
Ok(Arc::new(
222222
Field::new(field_name, arrow_type, false).with_metadata(HashMap::from([(
223223
PARQUET_FIELD_ID_META_KEY.to_string(),

crates/iceberg/src/metadata_columns.rs

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,60 @@ pub const RESERVED_FIELD_ID_FILE: i32 = 2147483646;
3030
/// Reserved column name for the file path metadata column
3131
pub const RESERVED_COL_NAME_FILE: &str = "_file";
3232

33-
/// Returns the field name for a reserved field ID.
33+
/// Returns the column name for a metadata field ID.
3434
///
3535
/// # Arguments
36-
/// * `field_id` - The reserved field ID
36+
/// * `field_id` - The metadata field ID
3737
///
3838
/// # Returns
39-
/// The name of the reserved field, or an error if the field ID is not recognized
40-
pub fn get_reserved_field_name(field_id: i32) -> Result<&'static str> {
39+
/// The name of the metadata column, or an error if the field ID is not recognized
40+
pub fn get_metadata_column_name(field_id: i32) -> Result<&'static str> {
4141
match field_id {
4242
RESERVED_FIELD_ID_FILE => Ok(RESERVED_COL_NAME_FILE),
4343
_ => Err(Error::new(
4444
ErrorKind::Unexpected,
45-
format!("Unknown reserved field ID: {field_id}"),
45+
format!("Unknown metadata field ID: {field_id}"),
4646
)),
4747
}
4848
}
4949

50-
/// Checks if a field ID is a reserved (virtual/metadata) field.
50+
/// Returns the field ID for a metadata column name.
51+
///
52+
/// # Arguments
53+
/// * `column_name` - The metadata column name
54+
///
55+
/// # Returns
56+
/// The field ID of the metadata column, or an error if the column name is not recognized
57+
pub fn get_metadata_field_id(column_name: &str) -> Result<i32> {
58+
match column_name {
59+
RESERVED_COL_NAME_FILE => Ok(RESERVED_FIELD_ID_FILE),
60+
_ => Err(Error::new(
61+
ErrorKind::Unexpected,
62+
format!("Unknown metadata column name: {column_name}"),
63+
)),
64+
}
65+
}
66+
67+
/// Checks if a field ID is a metadata field.
5168
///
5269
/// # Arguments
5370
/// * `field_id` - The field ID to check
5471
///
5572
/// # Returns
56-
/// `true` if the field ID is reserved, `false` otherwise
57-
pub fn is_reserved_field(field_id: i32) -> bool {
73+
/// `true` if the field ID is a metadata field, `false` otherwise
74+
pub fn is_metadata_field(field_id: i32) -> bool {
5875
field_id == RESERVED_FIELD_ID_FILE
59-
// Additional reserved fields can be checked here in the future
76+
// Additional metadata fields can be checked here in the future
77+
}
78+
79+
/// Checks if a column name is a metadata column.
80+
///
81+
/// # Arguments
82+
/// * `column_name` - The column name to check
83+
///
84+
/// # Returns
85+
/// `true` if the column name is a metadata column, `false` otherwise
86+
pub fn is_metadata_column_name(column_name: &str) -> bool {
87+
column_name == RESERVED_COL_NAME_FILE
88+
// Additional metadata column names can be checked here in the future
6089
}

crates/iceberg/src/scan/mod.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use crate::delete_file_index::DeleteFileIndex;
3636
use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator;
3737
use crate::expr::{Bind, BoundPredicate, Predicate};
3838
use crate::io::FileIO;
39-
use crate::metadata_columns::{RESERVED_COL_NAME_FILE, RESERVED_FIELD_ID_FILE};
39+
use crate::metadata_columns::{get_metadata_field_id, is_metadata_column_name};
4040
use crate::runtime::spawn;
4141
use crate::spec::{DataContentType, SnapshotRef};
4242
use crate::table::Table;
@@ -222,7 +222,7 @@ impl<'a> TableScanBuilder<'a> {
222222
if let Some(column_names) = self.column_names.as_ref() {
223223
for column_name in column_names {
224224
// Skip reserved columns that don't exist in the schema
225-
if column_name == RESERVED_COL_NAME_FILE {
225+
if is_metadata_column_name(column_name) {
226226
continue;
227227
}
228228
if schema.field_by_name(column_name).is_none() {
@@ -245,9 +245,9 @@ impl<'a> TableScanBuilder<'a> {
245245
});
246246

247247
for column_name in column_names.iter() {
248-
// Handle special reserved column "_file"
249-
if column_name == RESERVED_COL_NAME_FILE {
250-
field_ids.push(RESERVED_FIELD_ID_FILE);
248+
// Handle metadata columns (like "_file")
249+
if is_metadata_column_name(column_name) {
250+
field_ids.push(get_metadata_field_id(column_name)?);
251251
continue;
252252
}
253253

@@ -588,7 +588,8 @@ pub mod tests {
588588
use crate::arrow::ArrowReaderBuilder;
589589
use crate::expr::{BoundPredicate, Reference};
590590
use crate::io::{FileIO, OutputFile};
591-
use crate::scan::{FileScanTask, RESERVED_COL_NAME_FILE};
591+
use crate::metadata_columns::RESERVED_COL_NAME_FILE;
592+
use crate::scan::FileScanTask;
592593
use crate::spec::{
593594
DataContentType, DataFileBuilder, DataFileFormat, Datum, Literal, ManifestEntry,
594595
ManifestListWriter, ManifestStatus, ManifestWriterBuilder, NestedField, PartitionSpec,

0 commit comments

Comments
 (0)