Skip to content

Commit eb65fdf

Browse files
authored
Merge branch 'main' into ctty/name-pos
2 parents 34315f0 + 299cfed commit eb65fdf

File tree

4 files changed

+321
-2
lines changed

4 files changed

+321
-2
lines changed

crates/iceberg/src/spec/manifest/_serde.rs

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,4 +443,154 @@ mod tests {
443443

444444
assert_eq!(actual_data_file[0].content, DataContentType::Data)
445445
}
446+
447+
#[test]
448+
fn test_manifest_entry_v1_to_v2_projection() {
449+
use crate::spec::manifest::_serde::{DataFileSerde, ManifestEntryV1};
450+
use crate::spec::{Literal, RawLiteral, Struct, StructType};
451+
452+
let partition = RawLiteral::try_from(
453+
Literal::Struct(Struct::empty()),
454+
&Type::Struct(StructType::new(vec![])),
455+
)
456+
.unwrap();
457+
458+
// Create a V1 manifest entry struct (lacks V2 sequence number fields)
459+
let v1_entry = ManifestEntryV1 {
460+
status: 1, // Added
461+
snapshot_id: 12345,
462+
data_file: DataFileSerde {
463+
content: 0, // DataFileSerde is shared between V1/V2
464+
file_path: "test/path.parquet".to_string(),
465+
file_format: "PARQUET".to_string(),
466+
partition,
467+
record_count: 100,
468+
file_size_in_bytes: 1024,
469+
block_size_in_bytes: Some(0), // V1 includes this field
470+
column_sizes: None,
471+
value_counts: None,
472+
null_value_counts: None,
473+
nan_value_counts: None,
474+
lower_bounds: None,
475+
upper_bounds: None,
476+
key_metadata: None,
477+
split_offsets: None,
478+
equality_ids: None, // Will be converted to empty vec
479+
sort_order_id: None,
480+
first_row_id: None,
481+
referenced_data_file: None,
482+
content_offset: None,
483+
content_size_in_bytes: None,
484+
},
485+
};
486+
487+
// Test the explicit V1→V2 conversion logic in ManifestEntryV1::try_into()
488+
let v2_entry = v1_entry
489+
.try_into(
490+
0, // partition_spec_id
491+
&StructType::new(vec![]),
492+
&schema(),
493+
)
494+
.unwrap();
495+
496+
// Verify that V1→V2 conversion adds the missing V2 sequence number fields
497+
assert_eq!(
498+
v2_entry.sequence_number,
499+
Some(0),
500+
"ManifestEntryV1::try_into() should set sequence_number to 0"
501+
);
502+
assert_eq!(
503+
v2_entry.file_sequence_number,
504+
Some(0),
505+
"ManifestEntryV1::try_into() should set file_sequence_number to 0"
506+
);
507+
assert_eq!(
508+
v2_entry.snapshot_id,
509+
Some(12345),
510+
"snapshot_id should be preserved during conversion"
511+
);
512+
513+
// Verify that DataFileSerde conversion applies V2 defaults
514+
assert_eq!(
515+
v2_entry.data_file.content,
516+
DataContentType::Data,
517+
"DataFileSerde should convert content 0 to DataContentType::Data"
518+
);
519+
assert_eq!(
520+
v2_entry.data_file.equality_ids,
521+
Vec::<i32>::new(),
522+
"DataFileSerde should convert None equality_ids to empty vec"
523+
);
524+
525+
// Verify other fields are preserved during conversion
526+
assert_eq!(v2_entry.data_file.file_path, "test/path.parquet");
527+
assert_eq!(v2_entry.data_file.record_count, 100);
528+
assert_eq!(v2_entry.data_file.file_size_in_bytes, 1024);
529+
}
530+
531+
#[test]
532+
fn test_data_file_serde_v1_field_defaults() {
533+
use crate::spec::manifest::_serde::DataFileSerde;
534+
use crate::spec::{Literal, RawLiteral, Struct, StructType};
535+
536+
let partition = RawLiteral::try_from(
537+
Literal::Struct(Struct::empty()),
538+
&Type::Struct(StructType::new(vec![])),
539+
)
540+
.unwrap();
541+
542+
// Create a DataFileSerde that simulates V1 deserialization behavior
543+
// (missing V2 fields would be None due to #[serde(default)])
544+
let v1_style_data_file = DataFileSerde {
545+
content: 0, // V1 doesn't have this field, defaults to 0 via #[serde(default)]
546+
file_path: "test/data.parquet".to_string(),
547+
file_format: "PARQUET".to_string(),
548+
partition,
549+
record_count: 500,
550+
file_size_in_bytes: 2048,
551+
block_size_in_bytes: Some(1024), // V1 includes this field, V2 skips it
552+
column_sizes: None,
553+
value_counts: None,
554+
null_value_counts: None,
555+
nan_value_counts: None,
556+
lower_bounds: None,
557+
upper_bounds: None,
558+
key_metadata: None,
559+
split_offsets: None,
560+
equality_ids: None, // V1 doesn't have this field, defaults to None via #[serde(default)]
561+
sort_order_id: None,
562+
first_row_id: None,
563+
referenced_data_file: None,
564+
content_offset: None,
565+
content_size_in_bytes: None,
566+
};
567+
568+
// Test the DataFileSerde::try_into() conversion that handles V1 field defaults
569+
let data_file = v1_style_data_file
570+
.try_into(
571+
0, // partition_spec_id
572+
&StructType::new(vec![]),
573+
&schema(),
574+
)
575+
.unwrap();
576+
577+
// Verify that DataFileSerde::try_into() applies correct defaults for missing V2 fields
578+
assert_eq!(
579+
data_file.content,
580+
DataContentType::Data,
581+
"content 0 should convert to DataContentType::Data"
582+
);
583+
assert_eq!(
584+
data_file.equality_ids,
585+
Vec::<i32>::new(),
586+
"None equality_ids should convert to empty vec via unwrap_or_default()"
587+
);
588+
589+
// Verify other fields are handled correctly during conversion
590+
assert_eq!(data_file.file_path, "test/data.parquet");
591+
assert_eq!(data_file.file_format, DataFileFormat::Parquet);
592+
assert_eq!(data_file.record_count, 500);
593+
assert_eq!(data_file.file_size_in_bytes, 2048);
594+
assert_eq!(data_file.partition_spec_id, 0);
595+
}
446596
}

crates/iceberg/src/spec/manifest/data_file.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,10 @@ pub fn read_data_files_from_avro<R: Read>(
337337

338338
/// Type of content stored by the data file: data, equality deletes, or
339339
/// position deletes (all v1 files are data files)
340-
#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]
340+
#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize, Default)]
341341
pub enum DataContentType {
342342
/// value: 0
343+
#[default]
343344
Data = 0,
344345
/// value: 1
345346
PositionDeletes = 1,
@@ -403,3 +404,17 @@ impl std::fmt::Display for DataFileFormat {
403404
}
404405
}
405406
}
407+
408+
#[cfg(test)]
409+
mod test {
410+
use crate::spec::DataContentType;
411+
#[test]
412+
fn test_data_content_type_default() {
413+
assert_eq!(DataContentType::default(), DataContentType::Data);
414+
}
415+
416+
#[test]
417+
fn test_data_content_type_default_value() {
418+
assert_eq!(DataContentType::default() as i32, 0);
419+
}
420+
}

crates/iceberg/src/spec/manifest_list.rs

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,9 +600,10 @@ impl ManifestFile {
600600
}
601601

602602
/// The type of files tracked by the manifest, either data or delete files; Data(0) for all v1 manifests
603-
#[derive(Debug, PartialEq, Clone, Copy, Eq, Hash)]
603+
#[derive(Debug, PartialEq, Clone, Copy, Eq, Hash, Default)]
604604
pub enum ManifestContentType {
605605
/// The manifest content is data.
606+
#[default]
606607
Data = 0,
607608
/// The manifest content is deletes.
608609
Deletes = 1,
@@ -1357,4 +1358,67 @@ mod test {
13571358
};
13581359
fields
13591360
}
1361+
1362+
#[test]
1363+
fn test_manifest_content_type_default() {
1364+
assert_eq!(ManifestContentType::default(), ManifestContentType::Data);
1365+
}
1366+
1367+
#[test]
1368+
fn test_manifest_content_type_default_value() {
1369+
assert_eq!(ManifestContentType::default() as i32, 0);
1370+
}
1371+
1372+
#[test]
1373+
fn test_manifest_file_v1_to_v2_projection() {
1374+
use crate::spec::manifest_list::_serde::ManifestFileV1;
1375+
1376+
// Create a V1 manifest file object (without V2 fields)
1377+
let v1_manifest = ManifestFileV1 {
1378+
manifest_path: "/test/manifest.avro".to_string(),
1379+
manifest_length: 5806,
1380+
partition_spec_id: 0,
1381+
added_snapshot_id: 1646658105718557341,
1382+
added_data_files_count: Some(3),
1383+
existing_data_files_count: Some(0),
1384+
deleted_data_files_count: Some(0),
1385+
added_rows_count: Some(3),
1386+
existing_rows_count: Some(0),
1387+
deleted_rows_count: Some(0),
1388+
partitions: None,
1389+
key_metadata: None,
1390+
};
1391+
1392+
// Convert V1 to V2 - this should apply defaults for missing V2 fields
1393+
let v2_manifest: ManifestFile = v1_manifest.try_into().unwrap();
1394+
1395+
// Verify V1→V2 projection defaults are applied correctly
1396+
assert_eq!(
1397+
v2_manifest.content,
1398+
ManifestContentType::Data,
1399+
"V1 manifest content should default to Data (0)"
1400+
);
1401+
assert_eq!(
1402+
v2_manifest.sequence_number, 0,
1403+
"V1 manifest sequence_number should default to 0"
1404+
);
1405+
assert_eq!(
1406+
v2_manifest.min_sequence_number, 0,
1407+
"V1 manifest min_sequence_number should default to 0"
1408+
);
1409+
1410+
// Verify other fields are preserved correctly
1411+
assert_eq!(v2_manifest.manifest_path, "/test/manifest.avro");
1412+
assert_eq!(v2_manifest.manifest_length, 5806);
1413+
assert_eq!(v2_manifest.partition_spec_id, 0);
1414+
assert_eq!(v2_manifest.added_snapshot_id, 1646658105718557341);
1415+
assert_eq!(v2_manifest.added_files_count, Some(3));
1416+
assert_eq!(v2_manifest.existing_files_count, Some(0));
1417+
assert_eq!(v2_manifest.deleted_files_count, Some(0));
1418+
assert_eq!(v2_manifest.added_rows_count, Some(3));
1419+
assert_eq!(v2_manifest.existing_rows_count, Some(0));
1420+
assert_eq!(v2_manifest.deleted_rows_count, Some(0));
1421+
assert_eq!(v2_manifest.partitions, None);
1422+
assert_eq!(v2_manifest.key_metadata, None);
1423+
}
13601424
}

crates/iceberg/src/spec/snapshot.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,4 +435,94 @@ mod tests {
435435
);
436436
assert_eq!("s3://b/wh/.../s1.avro".to_string(), *result.manifest_list());
437437
}
438+
439+
#[test]
440+
fn test_snapshot_v1_to_v2_projection() {
441+
use crate::spec::snapshot::_serde::SnapshotV1;
442+
443+
// Create a V1 snapshot (without sequence-number field)
444+
let v1_snapshot = SnapshotV1 {
445+
snapshot_id: 1234567890,
446+
parent_snapshot_id: Some(987654321),
447+
timestamp_ms: 1515100955770,
448+
manifest_list: Some("s3://bucket/manifest-list.avro".to_string()),
449+
manifests: None, // V1 can have either manifest_list or manifests, but not both
450+
summary: Some(Summary {
451+
operation: Operation::Append,
452+
additional_properties: HashMap::from([
453+
("added-files".to_string(), "5".to_string()),
454+
("added-records".to_string(), "100".to_string()),
455+
]),
456+
}),
457+
schema_id: Some(1),
458+
};
459+
460+
// Convert V1 to V2 - this should apply defaults for missing V2 fields
461+
let v2_snapshot: Snapshot = v1_snapshot.try_into().unwrap();
462+
463+
// Verify V1→V2 projection defaults are applied correctly
464+
assert_eq!(
465+
v2_snapshot.sequence_number(),
466+
0,
467+
"V1 snapshot sequence_number should default to 0"
468+
);
469+
470+
// Verify other fields are preserved correctly during conversion
471+
assert_eq!(v2_snapshot.snapshot_id(), 1234567890);
472+
assert_eq!(v2_snapshot.parent_snapshot_id(), Some(987654321));
473+
assert_eq!(v2_snapshot.timestamp_ms(), 1515100955770);
474+
assert_eq!(
475+
v2_snapshot.manifest_list(),
476+
"s3://bucket/manifest-list.avro"
477+
);
478+
assert_eq!(v2_snapshot.schema_id(), Some(1));
479+
assert_eq!(v2_snapshot.summary().operation, Operation::Append);
480+
assert_eq!(
481+
v2_snapshot
482+
.summary()
483+
.additional_properties
484+
.get("added-files"),
485+
Some(&"5".to_string())
486+
);
487+
}
488+
489+
#[test]
490+
fn test_snapshot_v1_to_v2_with_missing_summary() {
491+
use crate::spec::snapshot::_serde::SnapshotV1;
492+
493+
// Create a V1 snapshot without summary (should get default)
494+
let v1_snapshot = SnapshotV1 {
495+
snapshot_id: 1111111111,
496+
parent_snapshot_id: None,
497+
timestamp_ms: 1515100955770,
498+
manifest_list: Some("s3://bucket/manifest-list.avro".to_string()),
499+
manifests: None,
500+
summary: None, // V1 summary is optional
501+
schema_id: None,
502+
};
503+
504+
// Convert V1 to V2 - this should apply default summary
505+
let v2_snapshot: Snapshot = v1_snapshot.try_into().unwrap();
506+
507+
// Verify defaults are applied correctly
508+
assert_eq!(
509+
v2_snapshot.sequence_number(),
510+
0,
511+
"V1 snapshot sequence_number should default to 0"
512+
);
513+
assert_eq!(
514+
v2_snapshot.summary().operation,
515+
Operation::Append,
516+
"Missing V1 summary should default to Append operation"
517+
);
518+
assert!(
519+
v2_snapshot.summary().additional_properties.is_empty(),
520+
"Default summary should have empty additional_properties"
521+
);
522+
523+
// Verify other fields
524+
assert_eq!(v2_snapshot.snapshot_id(), 1111111111);
525+
assert_eq!(v2_snapshot.parent_snapshot_id(), None);
526+
assert_eq!(v2_snapshot.schema_id(), None);
527+
}
438528
}

0 commit comments

Comments
 (0)