@@ -481,12 +481,15 @@ type ManifestTestSuite struct {
481481}
482482
483483func (m * ManifestTestSuite ) writeManifestList () {
484- m .Require ().NoError (WriteManifestList (1 , & m .v1ManifestList , snapshotID , nil , nil , 0 , manifestFileRecordsV1 ))
484+ _ , err := WriteManifestList (1 , & m .v1ManifestList , snapshotID , nil , nil , 0 , manifestFileRecordsV1 )
485+ m .Require ().NoError (err )
485486 unassignedSequenceNum := int64 (- 1 )
486- m .Require ().NoError (WriteManifestList (2 , & m .v2ManifestList , snapshotID , nil , & unassignedSequenceNum , 0 , manifestFileRecordsV2 ))
487+ _ , err = WriteManifestList (2 , & m .v2ManifestList , snapshotID , nil , & unassignedSequenceNum , 0 , manifestFileRecordsV2 )
488+ m .Require ().NoError (err )
487489 v3SequenceNum := int64 (5 )
488490 firstRowID := int64 (1000 )
489- m .Require ().NoError (WriteManifestList (3 , & m .v3ManifestList , snapshotID , nil , & v3SequenceNum , firstRowID , manifestFileRecordsV3 ))
491+ _ , err = WriteManifestList (3 , & m .v3ManifestList , snapshotID , nil , & v3SequenceNum , firstRowID , manifestFileRecordsV3 )
492+ m .Require ().NoError (err )
490493}
491494
492495func (m * ManifestTestSuite ) writeManifestEntries () {
@@ -760,13 +763,75 @@ func (m *ManifestTestSuite) TestReadManifestListV3() {
760763 m .Nil (list [0 ].KeyMetadata ())
761764 m .Zero (list [0 ].PartitionSpecID ())
762765
766+ // V3 manifest list assigns first_row_id to data manifests
767+ m .Require ().NotNil (list [0 ].FirstRowId (), "v3 data manifest should have first_row_id" )
768+ m .EqualValues (1000 , * list [0 ].FirstRowId ())
769+
763770 part := list [0 ].Partitions ()[0 ]
764771 m .True (part .ContainsNull )
765772 m .False (* part .ContainsNaN )
766773 m .Equal ([]byte {0x01 , 0x00 , 0x00 , 0x00 }, * part .LowerBound )
767774 m .Equal ([]byte {0x02 , 0x00 , 0x00 , 0x00 }, * part .UpperBound )
768775}
769776
777+ func (m * ManifestTestSuite ) TestV3DataManifestFirstRowIDInheritance () {
778+ // Build a v3 data manifest with two entries that have null first_row_id.
779+ partitionSpec := NewPartitionSpecID (1 ,
780+ PartitionField {FieldID : 1000 , SourceID : 1 , Name : "x" , Transform : IdentityTransform {}})
781+ firstCount , secondCount := int64 (10 ), int64 (20 )
782+ entriesWithNullFirstRowID := []ManifestEntry {
783+ & manifestEntry {
784+ EntryStatus : EntryStatusADDED ,
785+ Snapshot : & entrySnapshotID ,
786+ Data : & dataFile {
787+ Content : EntryContentData ,
788+ Path : "/data/file1.parquet" ,
789+ Format : ParquetFile ,
790+ PartitionData : map [string ]any {"x" : int (1 )},
791+ RecordCount : firstCount ,
792+ FileSize : 1000 ,
793+ BlockSizeInBytes : 64 * 1024 ,
794+ FirstRowIDField : nil , // null so reader will inherit
795+ },
796+ },
797+ & manifestEntry {
798+ EntryStatus : EntryStatusADDED ,
799+ Snapshot : & entrySnapshotID ,
800+ Data : & dataFile {
801+ Content : EntryContentData ,
802+ Path : "/data/file2.parquet" ,
803+ Format : ParquetFile ,
804+ PartitionData : map [string ]any {"x" : int (2 )},
805+ RecordCount : secondCount ,
806+ FileSize : 2000 ,
807+ BlockSizeInBytes : 64 * 1024 ,
808+ FirstRowIDField : nil ,
809+ },
810+ },
811+ }
812+ var manifestBuf bytes.Buffer
813+ _ , err := WriteManifest ("/manifest.avro" , & manifestBuf , 3 , partitionSpec , testSchema , entrySnapshotID , entriesWithNullFirstRowID )
814+ m .Require ().NoError (err )
815+
816+ manifestFirstRowID := int64 (1000 )
817+ file := & manifestFile {
818+ version : 3 ,
819+ Path : "/manifest.avro" ,
820+ Content : ManifestContentData ,
821+ FirstRowID : & manifestFirstRowID ,
822+ }
823+ entries , err := ReadManifest (file , bytes .NewReader (manifestBuf .Bytes ()), false )
824+ m .Require ().NoError (err )
825+ m .Require ().Len (entries , 2 )
826+
827+ // First entry gets manifest's first_row_id
828+ m .Require ().NotNil (entries [0 ].DataFile ().FirstRowID ())
829+ m .EqualValues (1000 , * entries [0 ].DataFile ().FirstRowID ())
830+ // Second entry gets previous + previous file's record_count
831+ m .Require ().NotNil (entries [1 ].DataFile ().FirstRowID ())
832+ m .EqualValues (1000 + firstCount , * entries [1 ].DataFile ().FirstRowID ())
833+ }
834+
770835func (m * ManifestTestSuite ) TestReadManifestListIncompleteSchema () {
771836 // This prevents a regression that could be caused by using a schema cache
772837 // across multiple read/write operations of an avro file. While it may sound
@@ -779,7 +844,7 @@ func (m *ManifestTestSuite) TestReadManifestListIncompleteSchema() {
779844 // any cache. (Note: if working correctly, this will have no such side effect.)
780845 var buf bytes.Buffer
781846 seqNum := int64 (9876 )
782- err := WriteManifestList (2 , & buf , 1234 , nil , & seqNum , 0 , []ManifestFile {
847+ _ , err := WriteManifestList (2 , & buf , 1234 , nil , & seqNum , 0 , []ManifestFile {
783848 NewManifestFile (2 , "s3://bucket/namespace/table/metadata/abcd-0123.avro" , 99 , 0 , 1234 ).Build (),
784849 })
785850 m .NoError (err )
@@ -1550,7 +1615,7 @@ func (m *ManifestTestSuite) TestWriteManifestListClosesWriterOnError() {
15501615 m .Require ().NoError (writer .Close ())
15511616
15521617 out := & limitedWriter {limit : header .Len (), err : errLimitedWrite }
1553- err = WriteManifestList (2 , out , snapshotID , nil , & seqNum , 0 , []ManifestFile {
1618+ _ , err = WriteManifestList (2 , out , snapshotID , nil , & seqNum , 0 , []ManifestFile {
15541619 manifestFileRecordsV2 [0 ],
15551620 manifestFileRecordsV1 [0 ],
15561621 })
0 commit comments