@@ -21,7 +21,7 @@ use serde_derive::{Deserialize, Serialize};
21
21
use serde_with:: serde_as;
22
22
23
23
use super :: { Datum , ManifestEntry , Schema , Struct } ;
24
- use crate :: spec:: { Literal , RawLiteral , StructType , Type } ;
24
+ use crate :: spec:: { FormatVersion , Literal , RawLiteral , StructType , Type } ;
25
25
use crate :: { Error , ErrorKind } ;
26
26
27
27
#[ derive( Serialize , Deserialize ) ]
@@ -40,7 +40,7 @@ impl ManifestEntryV2 {
40
40
snapshot_id : value. snapshot_id ,
41
41
sequence_number : value. sequence_number ,
42
42
file_sequence_number : value. file_sequence_number ,
43
- data_file : DataFileSerde :: try_from ( value. data_file , partition_type, false ) ?,
43
+ data_file : DataFileSerde :: try_from ( value. data_file , partition_type, FormatVersion :: V2 ) ?,
44
44
} )
45
45
}
46
46
@@ -74,7 +74,7 @@ impl ManifestEntryV1 {
74
74
Ok ( Self {
75
75
status : value. status as i32 ,
76
76
snapshot_id : value. snapshot_id . unwrap_or_default ( ) ,
77
- data_file : DataFileSerde :: try_from ( value. data_file , partition_type, true ) ?,
77
+ data_file : DataFileSerde :: try_from ( value. data_file , partition_type, FormatVersion :: V1 ) ?,
78
78
} )
79
79
}
80
80
@@ -129,9 +129,13 @@ impl DataFileSerde {
129
129
pub fn try_from (
130
130
value : super :: DataFile ,
131
131
partition_type : & StructType ,
132
- is_version_1 : bool ,
132
+ format_version : FormatVersion ,
133
133
) -> Result < Self , Error > {
134
- let block_size_in_bytes = if is_version_1 { Some ( 0 ) } else { None } ;
134
+ let block_size_in_bytes = if format_version == FormatVersion :: V1 {
135
+ Some ( 0 )
136
+ } else {
137
+ None
138
+ } ;
135
139
Ok ( Self {
136
140
content : value. content as i32 ,
137
141
file_path : value. file_path ,
@@ -292,16 +296,23 @@ fn parse_i64_entry(v: Vec<I64Entry>) -> Result<HashMap<i32, u64>, Error> {
292
296
Ok ( m)
293
297
}
294
298
299
+ #[ allow( unused_mut) ]
295
300
fn to_i64_entry ( entries : HashMap < i32 , u64 > ) -> Result < Vec < I64Entry > , Error > {
296
- entries
301
+ let mut i64_entries = entries
297
302
. iter ( )
298
303
. map ( |e| {
299
304
Ok ( I64Entry {
300
305
key : * e. 0 ,
301
306
value : ( * e. 1 ) . try_into ( ) ?,
302
307
} )
303
308
} )
304
- . collect ( )
309
+ . collect :: < Result < Vec < _ > , Error > > ( ) ?;
310
+
311
+ // Ensure that the order is deterministic during testing
312
+ #[ cfg( test) ]
313
+ i64_entries. sort_by_key ( |e| e. key ) ;
314
+
315
+ Ok ( i64_entries)
305
316
}
306
317
307
318
#[ cfg( test) ]
@@ -432,4 +443,154 @@ mod tests {
432
443
433
444
assert_eq ! ( actual_data_file[ 0 ] . content, DataContentType :: Data )
434
445
}
446
+
447
+ #[ test]
448
+ fn test_manifest_entry_v1_to_v2_projection ( ) {
449
+ use crate :: spec:: manifest:: _serde:: { DataFileSerde , ManifestEntryV1 } ;
450
+ use crate :: spec:: { Literal , RawLiteral , Struct , StructType } ;
451
+
452
+ let partition = RawLiteral :: try_from (
453
+ Literal :: Struct ( Struct :: empty ( ) ) ,
454
+ & Type :: Struct ( StructType :: new ( vec ! [ ] ) ) ,
455
+ )
456
+ . unwrap ( ) ;
457
+
458
+ // Create a V1 manifest entry struct (lacks V2 sequence number fields)
459
+ let v1_entry = ManifestEntryV1 {
460
+ status : 1 , // Added
461
+ snapshot_id : 12345 ,
462
+ data_file : DataFileSerde {
463
+ content : 0 , // DataFileSerde is shared between V1/V2
464
+ file_path : "test/path.parquet" . to_string ( ) ,
465
+ file_format : "PARQUET" . to_string ( ) ,
466
+ partition,
467
+ record_count : 100 ,
468
+ file_size_in_bytes : 1024 ,
469
+ block_size_in_bytes : Some ( 0 ) , // V1 includes this field
470
+ column_sizes : None ,
471
+ value_counts : None ,
472
+ null_value_counts : None ,
473
+ nan_value_counts : None ,
474
+ lower_bounds : None ,
475
+ upper_bounds : None ,
476
+ key_metadata : None ,
477
+ split_offsets : None ,
478
+ equality_ids : None , // Will be converted to empty vec
479
+ sort_order_id : None ,
480
+ first_row_id : None ,
481
+ referenced_data_file : None ,
482
+ content_offset : None ,
483
+ content_size_in_bytes : None ,
484
+ } ,
485
+ } ;
486
+
487
+ // Test the explicit V1→V2 conversion logic in ManifestEntryV1::try_into()
488
+ let v2_entry = v1_entry
489
+ . try_into (
490
+ 0 , // partition_spec_id
491
+ & StructType :: new ( vec ! [ ] ) ,
492
+ & schema ( ) ,
493
+ )
494
+ . unwrap ( ) ;
495
+
496
+ // Verify that V1→V2 conversion adds the missing V2 sequence number fields
497
+ assert_eq ! (
498
+ v2_entry. sequence_number,
499
+ Some ( 0 ) ,
500
+ "ManifestEntryV1::try_into() should set sequence_number to 0"
501
+ ) ;
502
+ assert_eq ! (
503
+ v2_entry. file_sequence_number,
504
+ Some ( 0 ) ,
505
+ "ManifestEntryV1::try_into() should set file_sequence_number to 0"
506
+ ) ;
507
+ assert_eq ! (
508
+ v2_entry. snapshot_id,
509
+ Some ( 12345 ) ,
510
+ "snapshot_id should be preserved during conversion"
511
+ ) ;
512
+
513
+ // Verify that DataFileSerde conversion applies V2 defaults
514
+ assert_eq ! (
515
+ v2_entry. data_file. content,
516
+ DataContentType :: Data ,
517
+ "DataFileSerde should convert content 0 to DataContentType::Data"
518
+ ) ;
519
+ assert_eq ! (
520
+ v2_entry. data_file. equality_ids,
521
+ Vec :: <i32 >:: new( ) ,
522
+ "DataFileSerde should convert None equality_ids to empty vec"
523
+ ) ;
524
+
525
+ // Verify other fields are preserved during conversion
526
+ assert_eq ! ( v2_entry. data_file. file_path, "test/path.parquet" ) ;
527
+ assert_eq ! ( v2_entry. data_file. record_count, 100 ) ;
528
+ assert_eq ! ( v2_entry. data_file. file_size_in_bytes, 1024 ) ;
529
+ }
530
+
531
+ #[ test]
532
+ fn test_data_file_serde_v1_field_defaults ( ) {
533
+ use crate :: spec:: manifest:: _serde:: DataFileSerde ;
534
+ use crate :: spec:: { Literal , RawLiteral , Struct , StructType } ;
535
+
536
+ let partition = RawLiteral :: try_from (
537
+ Literal :: Struct ( Struct :: empty ( ) ) ,
538
+ & Type :: Struct ( StructType :: new ( vec ! [ ] ) ) ,
539
+ )
540
+ . unwrap ( ) ;
541
+
542
+ // Create a DataFileSerde that simulates V1 deserialization behavior
543
+ // (missing V2 fields would be None due to #[serde(default)])
544
+ let v1_style_data_file = DataFileSerde {
545
+ content : 0 , // V1 doesn't have this field, defaults to 0 via #[serde(default)]
546
+ file_path : "test/data.parquet" . to_string ( ) ,
547
+ file_format : "PARQUET" . to_string ( ) ,
548
+ partition,
549
+ record_count : 500 ,
550
+ file_size_in_bytes : 2048 ,
551
+ block_size_in_bytes : Some ( 1024 ) , // V1 includes this field, V2 skips it
552
+ column_sizes : None ,
553
+ value_counts : None ,
554
+ null_value_counts : None ,
555
+ nan_value_counts : None ,
556
+ lower_bounds : None ,
557
+ upper_bounds : None ,
558
+ key_metadata : None ,
559
+ split_offsets : None ,
560
+ equality_ids : None , // V1 doesn't have this field, defaults to None via #[serde(default)]
561
+ sort_order_id : None ,
562
+ first_row_id : None ,
563
+ referenced_data_file : None ,
564
+ content_offset : None ,
565
+ content_size_in_bytes : None ,
566
+ } ;
567
+
568
+ // Test the DataFileSerde::try_into() conversion that handles V1 field defaults
569
+ let data_file = v1_style_data_file
570
+ . try_into (
571
+ 0 , // partition_spec_id
572
+ & StructType :: new ( vec ! [ ] ) ,
573
+ & schema ( ) ,
574
+ )
575
+ . unwrap ( ) ;
576
+
577
+ // Verify that DataFileSerde::try_into() applies correct defaults for missing V2 fields
578
+ assert_eq ! (
579
+ data_file. content,
580
+ DataContentType :: Data ,
581
+ "content 0 should convert to DataContentType::Data"
582
+ ) ;
583
+ assert_eq ! (
584
+ data_file. equality_ids,
585
+ Vec :: <i32 >:: new( ) ,
586
+ "None equality_ids should convert to empty vec via unwrap_or_default()"
587
+ ) ;
588
+
589
+ // Verify other fields are handled correctly during conversion
590
+ assert_eq ! ( data_file. file_path, "test/data.parquet" ) ;
591
+ assert_eq ! ( data_file. file_format, DataFileFormat :: Parquet ) ;
592
+ assert_eq ! ( data_file. record_count, 500 ) ;
593
+ assert_eq ! ( data_file. file_size_in_bytes, 2048 ) ;
594
+ assert_eq ! ( data_file. partition_spec_id, 0 ) ;
595
+ }
435
596
}
0 commit comments