Skip to content

Commit af99db1

Browse files
committed
Add _partition as well
1 parent 5feaa96 commit af99db1

File tree

1 file changed

+144
-3
lines changed

1 file changed

+144
-3
lines changed

crates/iceberg/src/metadata_columns.rs

Lines changed: 144 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ pub const RESERVED_FIELD_ID_DELETED: i32 = i32::MAX - 3;
4141
/// Reserved field ID for the spec ID (_spec_id) column per Iceberg spec
4242
pub const RESERVED_FIELD_ID_SPEC_ID: i32 = i32::MAX - 4;
4343

44+
/// Reserved field ID for the partition (_partition) column per Iceberg spec
45+
pub const RESERVED_FIELD_ID_PARTITION: i32 = i32::MAX - 5;
46+
4447
/// Reserved field ID for the file path in position delete files
4548
pub const RESERVED_FIELD_ID_DELETE_FILE_PATH: i32 = i32::MAX - 101;
4649

@@ -74,6 +77,9 @@ pub const RESERVED_COL_NAME_DELETED: &str = "_deleted";
7477
/// Reserved column name for the spec ID metadata column
7578
pub const RESERVED_COL_NAME_SPEC_ID: &str = "_spec_id";
7679

80+
/// Reserved column name for the partition metadata column
81+
pub const RESERVED_COL_NAME_PARTITION: &str = "_partition";
82+
7783
/// Reserved column name for the file path in position delete files
7884
pub const RESERVED_COL_NAME_DELETE_FILE_PATH: &str = "file_path";
7985

@@ -328,8 +334,57 @@ pub fn last_updated_sequence_number_field() -> &'static NestedFieldRef {
328334
&LAST_UPDATED_SEQUENCE_NUMBER_FIELD
329335
}
330336

337+
/// Creates the Iceberg field definition for the _partition metadata column.
338+
///
339+
/// The _partition field is a struct whose fields depend on the partition spec.
340+
/// This function creates the field dynamically with the provided partition fields.
341+
///
342+
/// # Arguments
343+
/// * `partition_fields` - The fields that make up the partition struct
344+
///
345+
/// # Returns
346+
/// A new _partition field definition as an Iceberg NestedField
347+
///
348+
/// # Example
349+
/// ```
350+
/// use std::sync::Arc;
351+
///
352+
/// use iceberg::metadata_columns::partition_field;
353+
/// use iceberg::spec::{NestedField, PrimitiveType, Type};
354+
///
355+
/// let fields = vec![
356+
/// Arc::new(NestedField::required(
357+
/// 1,
358+
/// "year",
359+
/// Type::Primitive(PrimitiveType::Int),
360+
/// )),
361+
/// Arc::new(NestedField::required(
362+
/// 2,
363+
/// "month",
364+
/// Type::Primitive(PrimitiveType::Int),
365+
/// )),
366+
/// ];
367+
/// let partition_field = partition_field(fields);
368+
/// ```
369+
pub fn partition_field(partition_fields: Vec<NestedFieldRef>) -> NestedFieldRef {
370+
use crate::spec::StructType;
371+
372+
Arc::new(
373+
NestedField::required(
374+
RESERVED_FIELD_ID_PARTITION,
375+
RESERVED_COL_NAME_PARTITION,
376+
Type::Struct(StructType::new(partition_fields)),
377+
)
378+
.with_doc("Partition to which a row belongs"),
379+
)
380+
}
381+
331382
/// Returns the Iceberg field definition for a metadata field ID.
332383
///
384+
/// Note: This function does not support `_partition` (field ID `i32::MAX - 5`) because
385+
/// it's a struct field that requires dynamic partition fields. Use `partition_field()`
386+
/// instead to create the `_partition` field with the appropriate partition fields.
387+
///
333388
/// # Arguments
334389
/// * `field_id` - The metadata field ID
335390
///
@@ -341,15 +396,17 @@ pub fn get_metadata_field(field_id: i32) -> Result<&'static NestedFieldRef> {
341396
RESERVED_FIELD_ID_POS => Ok(pos_field()),
342397
RESERVED_FIELD_ID_DELETED => Ok(deleted_field()),
343398
RESERVED_FIELD_ID_SPEC_ID => Ok(spec_id_field()),
399+
RESERVED_FIELD_ID_PARTITION => Err(Error::new(
400+
ErrorKind::Unexpected,
401+
"The _partition field must be created using partition_field() with appropriate partition fields",
402+
)),
344403
RESERVED_FIELD_ID_DELETE_FILE_PATH => Ok(delete_file_path_field()),
345404
RESERVED_FIELD_ID_DELETE_FILE_POS => Ok(delete_file_pos_field()),
346405
RESERVED_FIELD_ID_CHANGE_TYPE => Ok(change_type_field()),
347406
RESERVED_FIELD_ID_CHANGE_ORDINAL => Ok(change_ordinal_field()),
348407
RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID => Ok(commit_snapshot_id_field()),
349408
RESERVED_FIELD_ID_ROW_ID => Ok(row_id_field()),
350-
RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER => {
351-
Ok(last_updated_sequence_number_field())
352-
}
409+
RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER => Ok(last_updated_sequence_number_field()),
353410
_ if is_metadata_field(field_id) => {
354411
// Future metadata fields can be added here
355412
Err(Error::new(
@@ -379,6 +436,7 @@ pub fn get_metadata_field_id(column_name: &str) -> Result<i32> {
379436
RESERVED_COL_NAME_POS => Ok(RESERVED_FIELD_ID_POS),
380437
RESERVED_COL_NAME_DELETED => Ok(RESERVED_FIELD_ID_DELETED),
381438
RESERVED_COL_NAME_SPEC_ID => Ok(RESERVED_FIELD_ID_SPEC_ID),
439+
RESERVED_COL_NAME_PARTITION => Ok(RESERVED_FIELD_ID_PARTITION),
382440
RESERVED_COL_NAME_DELETE_FILE_PATH => Ok(RESERVED_FIELD_ID_DELETE_FILE_PATH),
383441
RESERVED_COL_NAME_DELETE_FILE_POS => Ok(RESERVED_FIELD_ID_DELETE_FILE_POS),
384442
RESERVED_COL_NAME_CHANGE_TYPE => Ok(RESERVED_FIELD_ID_CHANGE_TYPE),
@@ -409,6 +467,7 @@ pub fn is_metadata_field(field_id: i32) -> bool {
409467
| RESERVED_FIELD_ID_POS
410468
| RESERVED_FIELD_ID_DELETED
411469
| RESERVED_FIELD_ID_SPEC_ID
470+
| RESERVED_FIELD_ID_PARTITION
412471
| RESERVED_FIELD_ID_DELETE_FILE_PATH
413472
| RESERVED_FIELD_ID_DELETE_FILE_POS
414473
| RESERVED_FIELD_ID_CHANGE_TYPE
@@ -429,3 +488,85 @@ pub fn is_metadata_field(field_id: i32) -> bool {
429488
pub fn is_metadata_column_name(column_name: &str) -> bool {
430489
get_metadata_field_id(column_name).is_ok()
431490
}
491+
492+
#[cfg(test)]
493+
mod tests {
494+
use super::*;
495+
use crate::spec::PrimitiveType;
496+
497+
#[test]
498+
fn test_partition_field_creation() {
499+
// Create partition fields for a hypothetical year/month partition
500+
let partition_fields = vec![
501+
Arc::new(NestedField::required(
502+
1000,
503+
"year",
504+
Type::Primitive(PrimitiveType::Int),
505+
)),
506+
Arc::new(NestedField::required(
507+
1001,
508+
"month",
509+
Type::Primitive(PrimitiveType::Int),
510+
)),
511+
];
512+
513+
// Create the _partition metadata field
514+
let partition = partition_field(partition_fields);
515+
516+
// Verify field properties
517+
assert_eq!(partition.id, RESERVED_FIELD_ID_PARTITION);
518+
assert_eq!(partition.name, RESERVED_COL_NAME_PARTITION);
519+
assert!(partition.required);
520+
521+
// Verify it's a struct type with correct fields
522+
if let Type::Struct(struct_type) = partition.field_type.as_ref() {
523+
assert_eq!(struct_type.fields().len(), 2);
524+
assert_eq!(struct_type.fields()[0].name, "year");
525+
assert_eq!(struct_type.fields()[1].name, "month");
526+
} else {
527+
panic!("Expected struct type for _partition field");
528+
}
529+
}
530+
531+
#[test]
532+
fn test_partition_field_id_recognized() {
533+
assert!(is_metadata_field(RESERVED_FIELD_ID_PARTITION));
534+
}
535+
536+
#[test]
537+
fn test_partition_field_name_recognized() {
538+
assert_eq!(
539+
get_metadata_field_id(RESERVED_COL_NAME_PARTITION).unwrap(),
540+
RESERVED_FIELD_ID_PARTITION
541+
);
542+
}
543+
544+
#[test]
545+
fn test_get_metadata_field_returns_error_for_partition() {
546+
// partition field requires dynamic creation, so get_metadata_field should return an error
547+
let result = get_metadata_field(RESERVED_FIELD_ID_PARTITION);
548+
assert!(result.is_err());
549+
assert!(
550+
result
551+
.unwrap_err()
552+
.to_string()
553+
.contains("partition_field()")
554+
);
555+
}
556+
557+
#[test]
558+
fn test_all_metadata_field_ids() {
559+
// Test that all non-partition metadata fields can be retrieved
560+
assert!(get_metadata_field(RESERVED_FIELD_ID_FILE).is_ok());
561+
assert!(get_metadata_field(RESERVED_FIELD_ID_POS).is_ok());
562+
assert!(get_metadata_field(RESERVED_FIELD_ID_DELETED).is_ok());
563+
assert!(get_metadata_field(RESERVED_FIELD_ID_SPEC_ID).is_ok());
564+
assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_PATH).is_ok());
565+
assert!(get_metadata_field(RESERVED_FIELD_ID_DELETE_FILE_POS).is_ok());
566+
assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_TYPE).is_ok());
567+
assert!(get_metadata_field(RESERVED_FIELD_ID_CHANGE_ORDINAL).is_ok());
568+
assert!(get_metadata_field(RESERVED_FIELD_ID_COMMIT_SNAPSHOT_ID).is_ok());
569+
assert!(get_metadata_field(RESERVED_FIELD_ID_ROW_ID).is_ok());
570+
assert!(get_metadata_field(RESERVED_FIELD_ID_LAST_UPDATED_SEQUENCE_NUMBER).is_ok());
571+
}
572+
}

0 commit comments

Comments
 (0)