@@ -59,15 +59,15 @@ trait SnapshotManagement { self: DeltaLog =>
59
59
60
60
@ volatile private [delta] var asyncUpdateTask : Future [Unit ] = _
61
61
62
+ /** Use ReentrantLock to allow us to call `lockInterruptibly` */
63
+ protected val snapshotLock = new ReentrantLock ()
64
+
62
65
/**
63
66
* Cached fileStatus for the latest CRC file seen in the deltaLog.
64
67
*/
65
68
@ volatile protected var lastSeenChecksumFileStatusOpt : Option [FileStatus ] = None
66
69
@ volatile protected var currentSnapshot : CapturedSnapshot = getSnapshotAtInit
67
70
68
- /** Use ReentrantLock to allow us to call `lockInterruptibly` */
69
- protected val snapshotLock = new ReentrantLock ()
70
-
71
71
/**
72
72
* Run `body` inside `snapshotLock` lock using `lockInterruptibly` so that the thread
73
73
* can be interrupted when waiting for the lock.
@@ -81,20 +81,6 @@ trait SnapshotManagement { self: DeltaLog =>
81
81
}
82
82
}
83
83
84
- /**
85
- * Get the LogSegment that will help in computing the Snapshot of the table at DeltaLog
86
- * initialization, or None if the directory was empty/missing.
87
- *
88
- * @param startingCheckpoint A checkpoint that we can start our listing from
89
- */
90
- protected def getLogSegmentFrom (
91
- startingCheckpoint : Option [LastCheckpointInfo ]): Option [LogSegment ] = {
92
- getLogSegmentForVersion(
93
- versionToLoad = None ,
94
- lastCheckpointInfo = startingCheckpoint
95
- )
96
- }
97
-
98
84
/** Get an iterator of files in the _delta_log directory starting with the startVersion. */
99
85
private [delta] def listFrom (startVersion : Long ): Iterator [FileStatus ] = {
100
86
store.listFrom(listingPrefix(logPath, startVersion), newDeltaHadoopConf())
@@ -231,11 +217,11 @@ trait SnapshotManagement { self: DeltaLog =>
231
217
* @return Some LogSegment to build a Snapshot if files do exist after the given
232
218
* startCheckpoint. None, if the directory was missing or empty.
233
219
*/
234
- protected def getLogSegmentForVersion (
220
+ protected def createLogSegment (
235
221
versionToLoad : Option [Long ] = None ,
236
222
oldCheckpointProviderOpt : Option [UninitializedCheckpointProvider ] = None ,
237
- lastCheckpointInfo : Option [LastCheckpointInfo ] = None ,
238
- commitStoreOpt : Option [CommitStore ] = None ): Option [LogSegment ] = {
223
+ commitStoreOpt : Option [CommitStore ] = None ,
224
+ lastCheckpointInfo : Option [LastCheckpointInfo ] = None ): Option [LogSegment ] = {
239
225
// List based on the last known checkpoint version.
240
226
// if that is -1, list from version 0L
241
227
val lastCheckpointVersion = getCheckpointVersion(lastCheckpointInfo, oldCheckpointProviderOpt)
@@ -253,6 +239,12 @@ trait SnapshotManagement { self: DeltaLog =>
253
239
)
254
240
}
255
241
242
+ private def createLogSegment (previousSnapshot : Snapshot ): Option [LogSegment ] = {
243
+ createLogSegment(
244
+ oldCheckpointProviderOpt = Some (previousSnapshot.checkpointProvider),
245
+ commitStoreOpt = previousSnapshot.commitStoreOpt)
246
+ }
247
+
256
248
/**
257
249
* Returns the last known checkpoint version based on [[LastCheckpointInfo ]] or
258
250
* [[CheckpointProvider ]].
@@ -324,7 +316,7 @@ trait SnapshotManagement { self: DeltaLog =>
324
316
// deleting files. Either way, we can't safely continue.
325
317
//
326
318
// For now, we preserve existing behavior by returning Array.empty, which will trigger a
327
- // recursive call to [[getLogSegmentForVersion ]] below.
319
+ // recursive call to [[createLogSegment ]] below.
328
320
Array .empty[FileStatus ]
329
321
}
330
322
@@ -335,7 +327,7 @@ trait SnapshotManagement { self: DeltaLog =>
335
327
} else if (newFiles.isEmpty) {
336
328
// The directory may be deleted and recreated and we may have stale state in our DeltaLog
337
329
// singleton, so try listing from the first version
338
- return getLogSegmentForVersion (versionToLoad = versionToLoad)
330
+ return createLogSegment (versionToLoad = versionToLoad)
339
331
}
340
332
val (checkpoints, deltasAndCompactedDeltas) = newFiles.partition(isCheckpointFile)
341
333
val (deltas, compactedDeltas) = deltasAndCompactedDeltas.partition(isDeltaFile)
@@ -498,30 +490,20 @@ trait SnapshotManagement { self: DeltaLog =>
498
490
* file as a hint on where to start listing the transaction log directory. If the _delta_log
499
491
* directory doesn't exist, this method will return an `InitialSnapshot`.
500
492
*/
501
- protected def getSnapshotAtInit : CapturedSnapshot = {
493
+ protected def getSnapshotAtInit : CapturedSnapshot = withSnapshotLockInterruptibly {
502
494
recordFrameProfile(" Delta" , " SnapshotManagement.getSnapshotAtInit" ) {
503
- val currentTimestamp = clock.getTimeMillis()
495
+ val snapshotInitWallclockTime = clock.getTimeMillis()
504
496
val lastCheckpointOpt = readLastCheckpointFile()
505
- createSnapshotAtInitInternal(
506
- initSegment = getLogSegmentFrom(lastCheckpointOpt),
507
- timestamp = currentTimestamp
508
- )
509
- }
510
- }
511
-
512
- protected def createSnapshotAtInitInternal (
513
- initSegment : Option [LogSegment ],
514
- timestamp : Long ): CapturedSnapshot = {
515
- val snapshot = initSegment.map { segment =>
516
- val snapshot = createSnapshot(
517
- initSegment = segment,
518
- checksumOpt = None )
519
- snapshot
520
- }.getOrElse {
521
- logInfo(s " Creating initial snapshot without metadata, because the directory is empty " )
522
- new InitialSnapshot (logPath, this )
497
+ val initialSegmentForNewSnapshot = createLogSegment(
498
+ versionToLoad = None ,
499
+ lastCheckpointInfo = lastCheckpointOpt)
500
+ val snapshot = getUpdatedSnapshot(
501
+ oldSnapshotOpt = None ,
502
+ initialSegmentForNewSnapshot = initialSegmentForNewSnapshot,
503
+ initialCommitStore = None ,
504
+ isAsync = false )
505
+ CapturedSnapshot (snapshot, snapshotInitWallclockTime)
523
506
}
524
- CapturedSnapshot (snapshot, timestamp)
525
507
}
526
508
527
509
/**
@@ -696,7 +678,7 @@ trait SnapshotManagement { self: DeltaLog =>
696
678
* Instead, just do a general update to the latest available version. The racing commits
697
679
* can then use the version check short-circuit to avoid constructing a new snapshot.
698
680
*/
699
- getLogSegmentForVersion (
681
+ createLogSegment (
700
682
oldCheckpointProviderOpt = Some (oldCheckpointProvider),
701
683
commitStoreOpt = commitStoreOpt
702
684
).getOrElse {
@@ -907,48 +889,95 @@ trait SnapshotManagement { self: DeltaLog =>
907
889
*/
908
890
protected def updateInternal (isAsync : Boolean ): Snapshot =
909
891
recordDeltaOperation(this , " delta.log.update" , Map (TAG_ASYNC -> isAsync.toString)) {
910
- val updateTimestamp = clock.getTimeMillis()
892
+ val updateStartTimeMs = clock.getTimeMillis()
911
893
val previousSnapshot = currentSnapshot.snapshot
912
- val segmentOpt = getLogSegmentForVersion(
913
- oldCheckpointProviderOpt = Some (previousSnapshot.checkpointProvider),
914
- commitStoreOpt = previousSnapshot.commitStoreOpt)
915
- installLogSegmentInternal(previousSnapshot, segmentOpt, updateTimestamp, isAsync)
894
+ val segmentOpt = createLogSegment(previousSnapshot)
895
+ val newSnapshot = getUpdatedSnapshot(
896
+ oldSnapshotOpt = Some (previousSnapshot),
897
+ initialSegmentForNewSnapshot = segmentOpt,
898
+ initialCommitStore = previousSnapshot.commitStoreOpt,
899
+ isAsync = isAsync)
900
+ installSnapshot(newSnapshot, updateStartTimeMs)
901
+ }
902
+
903
+ /**
904
+ * Updates and installs a new snapshot in the `currentSnapshot`.
905
+ * This method takes care of recursively creating new snapshots if the commit store has changed.
906
+ * @param oldSnapshotOpt The previous snapshot, if any.
907
+ * @param initialSegmentForNewSnapshot the log segment constructed for the new snapshot
908
+ * @param initialCommitStore the Commit Store used for constructing the
909
+ * `initialSegmentForNewSnapshot`
910
+ * @param isAsync Whether the update is async.
911
+ * @return The new snapshot.
912
+ */
913
+ protected def getUpdatedSnapshot (
914
+ oldSnapshotOpt : Option [Snapshot ],
915
+ initialSegmentForNewSnapshot : Option [LogSegment ],
916
+ initialCommitStore : Option [CommitStore ],
917
+ isAsync : Boolean ): Snapshot = {
918
+ var commitStoreUsed = initialCommitStore
919
+ var newSnapshot = getSnapshotForLogSegmentInternal(
920
+ oldSnapshotOpt,
921
+ initialSegmentForNewSnapshot,
922
+ isAsync
923
+ )
924
+ // If the commit store has changed, we need to again invoke updateSnapshot so that we
925
+ // could get the latest commits from the new commit store. We need to do it only once as
926
+ // the delta spec mandates the commit which changes the commit owner to be backfilled.
927
+ if (newSnapshot.version >= 0 && newSnapshot.commitStoreOpt != commitStoreUsed) {
928
+ commitStoreUsed = newSnapshot.commitStoreOpt
929
+ val segmentOpt = createLogSegment(newSnapshot)
930
+ newSnapshot = getSnapshotForLogSegmentInternal(Some (newSnapshot), segmentOpt, isAsync)
916
931
}
932
+ newSnapshot
933
+ }
917
934
918
- /** Install the provided segmentOpt as the currentSnapshot on the cluster */
919
- protected def installLogSegmentInternal (
920
- previousSnapshot : Snapshot ,
935
+ /** Creates a Snapshot for the given `segmentOpt` */
936
+ protected def getSnapshotForLogSegmentInternal (
937
+ previousSnapshotOpt : Option [ Snapshot ] ,
921
938
segmentOpt : Option [LogSegment ],
922
- updateTimestamp : Long ,
923
939
isAsync : Boolean ): Snapshot = {
924
940
segmentOpt.map { segment =>
925
- if (segment == previousSnapshot.logSegment) {
926
- // If no changes were detected, just refresh the timestamp
927
- val timestampToUse = math.max(updateTimestamp, currentSnapshot.updateTimestamp)
928
- currentSnapshot = currentSnapshot.copy(updateTimestamp = timestampToUse)
941
+ if (previousSnapshotOpt.exists(_.logSegment == segment)) {
942
+ previousSnapshotOpt.get
929
943
} else {
930
944
val newSnapshot = createSnapshot(
931
945
initSegment = segment,
932
946
checksumOpt = None )
933
- logMetadataTableIdChange(previousSnapshot , newSnapshot)
947
+ previousSnapshotOpt.foreach( logMetadataTableIdChange(_ , newSnapshot) )
934
948
logInfo(s " Updated snapshot to $newSnapshot" )
935
- replaceSnapshot( newSnapshot, updateTimestamp)
949
+ newSnapshot
936
950
}
937
951
}.getOrElse {
938
- logInfo(s " No delta log found for the Delta table at $logPath " )
939
- replaceSnapshot( new InitialSnapshot (logPath, this ), updateTimestamp )
952
+ logInfo(s " Creating initial snapshot without metadata, because the directory is empty " )
953
+ new InitialSnapshot (logPath, this )
940
954
}
941
- currentSnapshot.snapshot
942
955
}
943
956
944
- /** Replace the given snapshot with the provided one. */
945
- protected def replaceSnapshot (newSnapshot : Snapshot , updateTimestamp : Long ): Unit = {
957
+ /** Installs the given `newSnapshot` as the `currentSnapshot` */
958
+ protected def installSnapshot (newSnapshot : Snapshot , updateTimestamp : Long ): Snapshot = {
946
959
if (! snapshotLock.isHeldByCurrentThread) {
960
+ if (Utils .isTesting) {
961
+ throw new RuntimeException (" DeltaLog snapshot replaced without taking lock" )
962
+ }
947
963
recordDeltaEvent(this , " delta.update.unsafeReplace" )
948
964
}
949
- val oldSnapshot = currentSnapshot.snapshot
950
- currentSnapshot = CapturedSnapshot (newSnapshot, updateTimestamp)
951
- oldSnapshot.uncache()
965
+ if (currentSnapshot == null ) {
966
+ // cold snapshot initialization
967
+ currentSnapshot = CapturedSnapshot (newSnapshot, updateTimestamp)
968
+ return newSnapshot
969
+ }
970
+ val CapturedSnapshot (oldSnapshot, oldTimestamp) = currentSnapshot
971
+ if (oldSnapshot eq newSnapshot) {
972
+ // Same snapshot as before, so just refresh the timestamp
973
+ val timestampToUse = math.max(updateTimestamp, oldTimestamp)
974
+ currentSnapshot = CapturedSnapshot (newSnapshot, timestampToUse)
975
+ } else {
976
+ // Install the new snapshot and uncache the old one
977
+ currentSnapshot = CapturedSnapshot (newSnapshot, updateTimestamp)
978
+ oldSnapshot.uncache()
979
+ }
980
+ newSnapshot
952
981
}
953
982
954
983
/** Log a change in the metadata's table id whenever we install a newer version of a snapshot */
@@ -1022,8 +1051,7 @@ trait SnapshotManagement { self: DeltaLog =>
1022
1051
committedVersion)
1023
1052
logMetadataTableIdChange(previousSnapshot, newSnapshot)
1024
1053
logInfo(s " Updated snapshot to $newSnapshot" )
1025
- replaceSnapshot(newSnapshot, updateTimestamp)
1026
- currentSnapshot.snapshot
1054
+ installSnapshot(newSnapshot, updateTimestamp)
1027
1055
}
1028
1056
}
1029
1057
@@ -1045,7 +1073,7 @@ trait SnapshotManagement { self: DeltaLog =>
1045
1073
// fallback to the other overload.
1046
1074
return getSnapshotAt(version)
1047
1075
}
1048
- val segment = getLogSegmentForVersion (
1076
+ val segment = createLogSegment (
1049
1077
versionToLoad = Some (version),
1050
1078
oldCheckpointProviderOpt = Some (lastCheckpointProvider)
1051
1079
).getOrElse {
@@ -1073,7 +1101,7 @@ trait SnapshotManagement { self: DeltaLog =>
1073
1101
.collect { case ci if ci.version <= version => ci }
1074
1102
.orElse(findLastCompleteCheckpointBefore(version))
1075
1103
.map(manuallyLoadCheckpoint)
1076
- getLogSegmentForVersion (
1104
+ createLogSegment (
1077
1105
versionToLoad = Some (version),
1078
1106
lastCheckpointInfo = lastCheckpointInfoHint
1079
1107
).map { segment =>
@@ -1085,6 +1113,9 @@ trait SnapshotManagement { self: DeltaLog =>
1085
1113
throw DeltaErrors .emptyDirectoryException(logPath.toString)
1086
1114
}
1087
1115
}
1116
+
1117
+ // Visible for testing
1118
+ private [delta] def getCapturedSnapshot (): CapturedSnapshot = currentSnapshot
1088
1119
}
1089
1120
1090
1121
object SnapshotManagement {
0 commit comments