@@ -1332,54 +1332,77 @@ func recordsToDataFiles(ctx context.Context, rootLocation string, meta *Metadata
13321332 yield (nil , err )
13331333 }
13341334 }
1335- currentSpec , err := meta .CurrentSpec ()
1336- if err != nil {
1337- return func (yield func (iceberg.DataFile , error ) bool ) {
1338- yield (nil , err )
1339- }
1340- }
1341- if currentSpec == nil {
1342- return func (yield func (iceberg.DataFile , error ) bool ) {
1343- yield (nil , fmt .Errorf ("cannot write files without a current spec: %w" , err ))
1344- }
1345- }
13461335
13471336 cw := newConcurrentDataFileWriter (func (rootLocation string , fs iceio.WriteFileIO , meta * MetadataBuilder , props iceberg.Properties , opts ... dataFileWriterOption ) (dataFileWriter , error ) {
13481337 return newDataFileWriter (rootLocation , fs , meta , props , opts ... )
13491338 })
1350- nextCount , stopCount := iter .Pull (args .counter )
1351- if currentSpec .IsUnpartitioned () {
1352- tasks := func (yield func (WriteTask ) bool ) {
1353- defer stopCount ()
13541339
1355- fileCount := 0
1356- for batch := range binPackRecords (args .itr , defaultBinPackLookback , targetFileSize ) {
1357- cnt , _ := nextCount ()
1358- fileCount ++
1359- t := WriteTask {
1360- Uuid : * args .writeUUID ,
1361- ID : cnt ,
1362- PartitionID : iceberg .UnpartitionedSpec .ID (),
1363- FileCount : fileCount ,
1364- Schema : taskSchema ,
1365- Batches : batch ,
1366- }
1367- if ! yield (t ) {
1368- return
1369- }
1370- }
1371- }
1340+ factory , err := newWriterFactory (rootLocation , args , meta , taskSchema , targetFileSize )
1341+ if err != nil {
1342+ panic (err )
1343+ }
13721344
1373- return cw .writeFiles (ctx , rootLocation , args .fs , meta , meta .props , nil , tasks )
1345+ if factory .currentSpec .IsUnpartitioned () {
1346+ return unpartitionedWrite (ctx , factory , args .itr )
13741347 }
13751348
1376- factory := NewWriterFactory (rootLocation , args , meta , taskSchema , targetFileSize )
1377- partitionWriter := newPartitionedFanoutWriter (* currentSpec , cw , meta .CurrentSchema (), args .itr , & factory )
1349+ partitionWriter := newPartitionedFanoutWriter (factory .currentSpec , cw , meta .CurrentSchema (), args .itr , factory )
13781350 workers := config .EnvConfig .MaxWorkers
13791351
13801352 return partitionWriter .Write (ctx , workers )
13811353}
13821354
1355+ func unpartitionedWrite (ctx context.Context , factory * writerFactory , records iter.Seq2 [arrow.RecordBatch , error ]) iter.Seq2 [iceberg.DataFile , error ] {
1356+ outputCh := make (chan iceberg.DataFile , 1 )
1357+ errCh := make (chan error , 1 )
1358+
1359+ go func () {
1360+ defer close (outputCh )
1361+ defer factory .stopCount ()
1362+
1363+ writer := factory .newRollingDataWriter (ctx , nil , "" , nil , outputCh )
1364+ for rec , err := range records {
1365+ if err != nil {
1366+ errCh <- err
1367+ close (errCh )
1368+ writer .close ()
1369+ writer .wg .Wait ()
1370+
1371+ return
1372+ }
1373+ if err := writer .Add (rec ); err != nil {
1374+ errCh <- err
1375+ close (errCh )
1376+ writer .close ()
1377+ writer .wg .Wait ()
1378+
1379+ return
1380+ }
1381+ }
1382+ close (writer .recordCh )
1383+ writer .wg .Wait ()
1384+ if err := <- writer .errorCh ; err != nil {
1385+ errCh <- err
1386+ }
1387+ close (errCh )
1388+ }()
1389+
1390+ return func (yield func (iceberg.DataFile , error ) bool ) {
1391+ defer func () {
1392+ for range outputCh {
1393+ }
1394+ }()
1395+ for df := range outputCh {
1396+ if ! yield (df , nil ) {
1397+ return
1398+ }
1399+ }
1400+ if err := <- errCh ; err != nil {
1401+ yield (nil , err )
1402+ }
1403+ }
1404+ }
1405+
13831406type partitionContext struct {
13841407 partitionData map [int ]any
13851408 specID int32
@@ -1448,8 +1471,14 @@ func positionDeleteRecordsToDataFiles(ctx context.Context, rootLocation string,
14481471
14491472 return cw .writeFiles (ctx , rootLocation , args .fs , meta , meta .props , nil , tasks )
14501473 }
1451- writerFactory := NewWriterFactory (rootLocation , args , meta , iceberg .PositionalDeleteSchema , targetFileSize )
1452- partitionWriter := newPositionDeletePartitionedFanoutWriter (latestMetadata , cw , partitionContextByFilePath , args .itr , & writerFactory )
1474+ factory , err := newWriterFactory (rootLocation , args , meta , iceberg .PositionalDeleteSchema , targetFileSize ,
1475+ withContentType (iceberg .EntryContentPosDeletes ),
1476+ withFactoryFileSchema (iceberg .PositionalDeleteSchema ))
1477+ if err != nil {
1478+ panic (err )
1479+ }
1480+
1481+ partitionWriter := newPositionDeletePartitionedFanoutWriter (latestMetadata , cw , partitionContextByFilePath , args .itr , factory )
14531482 workers := config .EnvConfig .MaxWorkers
14541483
14551484 return partitionWriter .Write (ctx , workers )
0 commit comments