@@ -151,6 +151,8 @@ import Synchronization
151151///
152152/// ### Persistence
153153/// - ``cache()``
154+ /// - ``checkpoint(_:_:_:)``
155+ /// - ``localCheckpoint(_:_:)``
154156/// - ``persist(storageLevel:)``
155157/// - ``unpersist(blocking:)``
156158/// - ``storageLevel``
@@ -1407,6 +1409,41 @@ public actor DataFrame: Sendable {
14071409 try await spark. client. createTempView ( self . plan. root, viewName, replace: replace, isGlobal: global)
14081410 }
14091411
1412+ /// Eagerly checkpoint a ``DataFrame`` and return the new ``DataFrame``.
1413+ /// Checkpointing can be used to truncate the logical plan of this ``DataFrame``,
1414+ /// which is especially useful in iterative algorithms where the plan may grow exponentially.
1415+ /// It will be saved to files inside the checkpoint directory.
1416+ /// - Parameters:
1417+ /// - eager: Whether to checkpoint this dataframe immediately
1418+ /// - reliableCheckpoint: Whether to create a reliable checkpoint saved to files inside the checkpoint directory.
1419+ /// If false creates a local checkpoint using the caching subsystem
1420+ /// - storageLevel: StorageLevel with which to checkpoint the data.
1421+ /// - Returns: A ``DataFrame``.
1422+ public func checkpoint(
1423+ _ eager: Bool = true ,
1424+ _ reliableCheckpoint: Bool = true ,
1425+ _ storageLevel: StorageLevel ? = nil
1426+ ) async throws -> DataFrame {
1427+ let plan = try await spark. client. getCheckpoint ( self . plan. root, eager, reliableCheckpoint, storageLevel)
1428+ return DataFrame ( spark: self . spark, plan: plan)
1429+ }
1430+
1431+ /// Locally checkpoints a ``DataFrame`` and return the new ``DataFrame``.
1432+ /// Checkpointing can be used to truncate the logical plan of this ``DataFrame``,
1433+ /// which is especially useful in iterative algorithms where the plan may grow exponentially.
1434+ /// Local checkpoints are written to executor storage and despite potentially faster they
1435+ /// are unreliable and may compromise job completion.
1436+ /// - Parameters:
1437+ /// - eager: Whether to checkpoint this dataframe immediately
1438+ /// - storageLevel: StorageLevel with which to checkpoint the data.
1439+ /// - Returns: A ``DataFrame``.
1440+ public func localCheckpoint(
1441+ _ eager: Bool = true ,
1442+ _ storageLevel: StorageLevel ? = nil
1443+ ) async throws -> DataFrame {
1444+ try await checkpoint ( eager, false , storageLevel)
1445+ }
1446+
14101447 /// Returns a ``DataFrameWriter`` that can be used to write non-streaming data.
14111448 public var write : DataFrameWriter {
14121449 get {
0 commit comments