1616//! ## Architecture
1717//!
1818//! - [`CheckpointWriter`] - Core component that manages the checkpoint creation workflow
19- //! - [`CheckpointDataIterator `] - Iterator over the checkpoint data to be written
19+ //! - [`ActionReconciliationIterator `] - Iterator over the checkpoint data to be written
2020//!
2121//! ## Usage
2222//!
3131//!
3232//! ```no_run
3333//! # use std::sync::Arc;
34- //! # use delta_kernel::checkpoint::CheckpointDataIterator ;
34+ //! # use delta_kernel::ActionReconciliationIterator ;
3535//! # use delta_kernel::checkpoint::CheckpointWriter;
3636//! # use delta_kernel::Engine;
3737//! # use delta_kernel::Snapshot;
4040//! # use delta_kernel::Error;
4141//! # use delta_kernel::FileMeta;
4242//! # use url::Url;
43- //! fn write_checkpoint_file(path: Url, data: &CheckpointDataIterator ) -> DeltaResult<FileMeta> {
43+ //! fn write_checkpoint_file(path: Url, data: &ActionReconciliationIterator ) -> DeltaResult<FileMeta> {
4444//! todo!() /* engine-specific logic to write data to object storage*/
4545//! }
4646//!
@@ -89,7 +89,7 @@ use std::sync::{Arc, LazyLock};
8989use crate :: action_reconciliation:: log_replay:: {
9090 ActionReconciliationBatch , ActionReconciliationProcessor ,
9191} ;
92- use crate :: action_reconciliation:: RetentionCalculator ;
92+ use crate :: action_reconciliation:: { ActionReconciliationIterator , RetentionCalculator } ;
9393use crate :: actions:: {
9494 Add , Metadata , Protocol , Remove , SetTransaction , Sidecar , ADD_NAME , CHECKPOINT_METADATA_NAME ,
9595 METADATA_NAME , PROTOCOL_NAME , REMOVE_NAME , SET_TRANSACTION_NAME , SIDECAR_NAME ,
@@ -145,43 +145,6 @@ static CHECKPOINT_METADATA_ACTION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(||
145145 ) ] ) )
146146} ) ;
147147
148- /// An iterator over the checkpoint data to be written to the file.
149- ///
150- /// This iterator yields filtered checkpoint data batches ([`FilteredEngineData`]) and
151- /// tracks action statistics required for finalizing the checkpoint.
152- ///
153- /// # Warning
154- /// The [`CheckpointDataIterator`] must be fully consumed to ensure proper collection of statistics for
155- /// the checkpoint. Additionally, all yielded data must be written to the specified path before calling
156- /// [`CheckpointWriter::finalize`]. Failing to do so may result in data loss or corruption.
157- pub struct CheckpointDataIterator {
158- /// The nested iterator that yields checkpoint batches with action counts
159- checkpoint_batch_iterator :
160- Box < dyn Iterator < Item = DeltaResult < ActionReconciliationBatch > > + Send > ,
161- /// Running total of actions included in the checkpoint
162- actions_count : i64 ,
163- /// Running total of add actions included in the checkpoint
164- add_actions_count : i64 ,
165- }
166-
167- impl Iterator for CheckpointDataIterator {
168- type Item = DeltaResult < FilteredEngineData > ;
169-
170- /// Advances the iterator and returns the next value.
171- ///
172- /// This implementation transforms the `ActionReconciliationBatch` items from the nested iterator into
173- /// [`FilteredEngineData`] items for the engine to write, while accumulating action counts from
174- /// each batch. The [`CheckpointDataIterator`] is passed back to the kernel on call to
175- /// [`CheckpointWriter::finalize`] for counts to be read and written to the `_last_checkpoint` file
176- fn next ( & mut self ) -> Option < Self :: Item > {
177- Some ( self . checkpoint_batch_iterator . next ( ) ?. map ( |batch| {
178- self . actions_count += batch. actions_count ;
179- self . add_actions_count += batch. add_actions_count ;
180- batch. filtered_data
181- } ) )
182- }
183- }
184-
185148/// Orchestrates the process of creating a checkpoint for a table.
186149///
187150/// The [`CheckpointWriter`] is the entry point for generating checkpoint data for a Delta table.
@@ -253,7 +216,7 @@ impl CheckpointWriter {
253216 /// # Parameters
254217 /// - `engine`: Implementation of [`Engine`] APIs.
255218 ///
256- /// # Returns: [`CheckpointDataIterator `] containing the checkpoint data
219+ /// # Returns: [`ActionReconciliationIterator `] containing the checkpoint data
257220 // This method is the core of the checkpoint generation process. It:
258221 // 1. Determines whether to write a V1 or V2 checkpoint based on the table's
259222 // `v2Checkpoints` feature support
@@ -262,7 +225,10 @@ impl CheckpointWriter {
262225 // 4. Chains the checkpoint metadata action if writing a V2 spec checkpoint
263226 // (i.e., if `v2Checkpoints` feature is supported by table)
264227 // 5. Generates the appropriate checkpoint path
265- pub fn checkpoint_data ( & self , engine : & dyn Engine ) -> DeltaResult < CheckpointDataIterator > {
228+ pub fn checkpoint_data (
229+ & self ,
230+ engine : & dyn Engine ,
231+ ) -> DeltaResult < ActionReconciliationIterator > {
266232 let is_v2_checkpoints_supported = self
267233 . snapshot
268234 . table_configuration ( )
@@ -284,12 +250,10 @@ impl CheckpointWriter {
284250 let checkpoint_metadata =
285251 is_v2_checkpoints_supported. then ( || self . create_checkpoint_metadata_batch ( engine) ) ;
286252
287- // Wrap the iterator in a CheckpointDataIterator to track action counts
288- Ok ( CheckpointDataIterator {
289- checkpoint_batch_iterator : Box :: new ( checkpoint_data. chain ( checkpoint_metadata) ) ,
290- actions_count : 0 ,
291- add_actions_count : 0 ,
292- } )
253+ // Wrap the iterator to track action counts
254+ Ok ( ActionReconciliationIterator :: new ( Box :: new (
255+ checkpoint_data. chain ( checkpoint_metadata) ,
256+ ) ) )
293257 }
294258
295259 /// Finalizes checkpoint creation by saving metadata about the checkpoint.
@@ -313,10 +277,10 @@ impl CheckpointWriter {
313277 self ,
314278 engine : & dyn Engine ,
315279 metadata : & FileMeta ,
316- mut checkpoint_data : CheckpointDataIterator ,
280+ checkpoint_data : ActionReconciliationIterator ,
317281 ) -> DeltaResult < ( ) > {
318282 // Ensure the checkpoint data iterator is fully exhausted
319- if checkpoint_data. checkpoint_batch_iterator . next ( ) . is_some ( ) {
283+ if ! checkpoint_data. is_exhausted ( ) {
320284 return Err ( Error :: checkpoint_write (
321285 "The checkpoint data iterator must be fully consumed and written to storage before calling finalize"
322286 ) ) ;
@@ -332,8 +296,8 @@ impl CheckpointWriter {
332296 let data = create_last_checkpoint_data (
333297 engine,
334298 self . version ,
335- checkpoint_data. actions_count ,
336- checkpoint_data. add_actions_count ,
299+ checkpoint_data. actions_count ( ) ,
300+ checkpoint_data. add_actions_count ( ) ,
337301 size_in_bytes,
338302 ) ;
339303
0 commit comments