@@ -470,8 +470,8 @@ func mapReplicaTypeToVoterOrNonVoter(rType roachpb.ReplicaType) roachpb.ReplicaT
470470//
471471// Some the state inside each *pendingReplicaChange is mutable at arbitrary
472472// points in time by the code inside this package (with the relevant locking,
473- // of course). Currently, this state is revisedGCTime , enactedAtTime. Neither
474- // of it is read by the public methods on PendingRangeChange.
473+ // of course). Currently, this state is gcTime , enactedAtTime. Neither of it
474+ // is read by the public methods on PendingRangeChange.
475475//
476476// TODO(sumeer): when we expand the set of mutable fields, make a deep copy.
477477type PendingRangeChange struct {
@@ -714,29 +714,23 @@ func (prc PendingRangeChange) LeaseTransferFrom() roachpb.StoreID {
714714// change. It may not be enacted if it will cause some invariant (like the
715715// number of replicas, or having a leaseholder) to be violated. If not
716716// enacted, the allocator will either be told about the lack of enactment, or
717- // will eventually expire from the allocator's state after
718- // pendingChangeGCDuration or revisedGCTime. Such expiration without enactment
719- // should be rare. pendingReplicaChanges can be paired, when a range is being
720- // moved from one store to another -- that pairing is not captured here, and
721- // captured in the changes suggested by the allocator to the external entity.
717+ // will eventually expire from the allocator's state at gcTime. Such
718+ // expiration without enactment should be rare. pendingReplicaChanges can be
719+ // paired, when a range is being moved from one store to another -- that
720+ // pairing is not captured here, and captured in the changes suggested by the
721+ // allocator to the external entity.
722722type pendingReplicaChange struct {
723723 changeID
724724 ReplicaChange
725725
726726 // The wall time at which this pending change was initiated. Used for
727- // expiry.
727+ // expiry. All replica changes in a PendingRangeChange have the same
728+ // startTime.
728729 startTime time.Time
729- // revisedGCTime is optionally populated (the zero value represents no
730- // revision). When populated, it represents a time, which, if earlier than
731- // the GC time decided by startTime + pendingChangeGCDuration, will cause an
732- // earlier GC. It is used to hasten GC (for the remaining changes) when some
733- // subset of changes corresponding to the same complex change have been
734- // observed to be enacted.
735- //
736- // The GC of these changes happens on a different path than the usual GC,
737- // which can undo the changes -- this GC happens only when processing a
738- // RangeMsg from the leaseholder.
739- revisedGCTime time.Time
730+ // gcTime represents a time when the unenacted change should be GC'd, either
731+ // using the normal GC undo path, or if rangeState.pendingChangeNoRollback
732+ // is true, when processing a RangeMsg from the leaseholder.
733+ gcTime time.Time
740734
741735 // TODO(kvoli,sumeerbhola): Consider adopting an explicit expiration time,
742736 // after which the change is considered to have been rejected. This would
@@ -1100,7 +1094,7 @@ type rangeState struct {
11001094 // - The pending change failed to apply via
11011095 // AdjustPendingChangesDisposition(failed)).
11021096 // - The pending change is garbage collected after this pending change has
1103- // been created for pendingChangeGCDuration .
1097+ // been created for pending{Replica,Lease}ChangeGCDuration .
11041098 //
11051099 // 3. Dropped due to incompatibility: mma creates these pending changes while
11061100 // working with an earlier authoritative leaseholder message. These changes
@@ -1550,20 +1544,25 @@ func (cs *clusterState) processStoreLeaseholderMsgInternal(
15501544 // Note that normal GC will not GC these, since normal GC needs to undo,
15511545 // and we are not allowed to undo these.
15521546 if len (remainingChanges ) > 0 {
1553- startTime := remainingChanges [0 ].startTime
1554- revisedGCTime := remainingChanges [0 ].revisedGCTime
1555- if startTime .Add (pendingChangeGCDuration ).Before (now ) || revisedGCTime .Before (now ) {
1547+ gcTime := remainingChanges [0 ].gcTime
1548+ if gcTime .Before (now ) {
15561549 gcRemainingChanges = true
15571550 }
15581551 }
1559- } else if len (enactedChanges ) > 0 {
1560- // First time this set of changes is seeing something enacted.
1552+ } else if len (enactedChanges ) > 0 && len (remainingChanges ) > 0 {
1553+ // First time this set of changes is seeing something enacted, and there
1554+ // are remaining changes.
15611555 //
15621556 // No longer permitted to rollback.
15631557 rs .pendingChangeNoRollback = true
1564- for _ , change := range remainingChanges {
1565- // Potentially shorten when the GC happens.
1566- change .revisedGCTime = now .Add (partiallyEnactedGCDuration )
1558+ // All remaining changes have the same gcTime.
1559+ curGCTime := remainingChanges [0 ].gcTime
1560+ revisedGCTime := now .Add (partiallyEnactedGCDuration )
1561+ if revisedGCTime .Before (curGCTime ) {
1562+ // Shorten when the GC happens.
1563+ for _ , change := range remainingChanges {
1564+ change .gcTime = revisedGCTime
1565+ }
15671566 }
15681567 }
15691568 // rs.pendingChanges is the union of remainingChanges and enactedChanges.
@@ -1865,17 +1864,21 @@ func (cs *clusterState) processStoreLeaseholderMsgInternal(
18651864
18661865}
18671866
1868- // If the pending change does not happen within this GC duration, we
1867+ // If the pending replica change does not happen within this GC duration, we
18691868// forget it in the data-structure.
1870- const pendingChangeGCDuration = 5 * time .Minute
1869+ const pendingReplicaChangeGCDuration = 5 * time .Minute
1870+
1871+ // If the pending lease transfer does not happen within this GC duration, we
1872+ // forget it in the data-structure.
1873+ const pendingLeaseTransferGCDuration = 1 * time .Minute
18711874
18721875// partiallyEnactedGCDuration is the duration after which a pending change is
18731876// GC'd if some other change that was part of the same decision has been
18741877// enacted, and this duration has elapsed since that enactment. This is
1875- // shorter than the normal pendingChangeGCDuration , since we want to clean up
1876- // such partially enacted changes faster. Long-running decisions are those
1877- // that involve a new range snapshot being sent, and that is the first change
1878- // that is seen as enacted. Subsequent ones should be fast.
1878+ // shorter than the normal pendingReplicaChangeGCDuration , since we want to
1879+ // clean up such partially enacted changes faster. Long-running decisions are
1880+ // those that involve a new range snapshot being sent, and that is the first
1881+ // change that is seen as enacted. Subsequent ones should be fast.
18791882const partiallyEnactedGCDuration = 30 * time .Second
18801883
18811884// Called periodically by allocator.
@@ -1904,11 +1907,8 @@ func (cs *clusterState) gcPendingChanges(now time.Time) {
19041907 if len (rs .pendingChanges ) == 0 {
19051908 panic (errors .AssertionFailedf ("no pending changes in range %v" , rangeID ))
19061909 }
1907- startTime := rs .pendingChanges [0 ].startTime
1908- // NB: we don't bother looking at revisedGCTime, since in that case
1909- // rangeState.pendingChangeNoRollback is set to true, so we can't do GC
1910- // here (since it requires undo).
1911- if ! startTime .Add (pendingChangeGCDuration ).Before (now ) {
1910+ gcTime := rs .pendingChanges [0 ].gcTime
1911+ if ! gcTime .Before (now ) {
19121912 continue
19131913 }
19141914 if err := cs .preCheckOnUndoReplicaChanges (PendingRangeChange {
@@ -2013,6 +2013,11 @@ func (cs *clusterState) addPendingRangeChange(change PendingRangeChange) {
20132013 // NB: rs != nil is also required, but we also check that in a method called
20142014 // below.
20152015
2016+ gcDuration := pendingReplicaChangeGCDuration
2017+ if change .IsTransferLease () {
2018+ // Only the lease is being transferred.
2019+ gcDuration = pendingLeaseTransferGCDuration
2020+ }
20162021 pendingChanges := change .pendingReplicaChanges
20172022 now := cs .ts .Now ()
20182023 for _ , pendingChange := range pendingChanges {
@@ -2021,6 +2026,7 @@ func (cs *clusterState) addPendingRangeChange(change PendingRangeChange) {
20212026 cid := cs .changeSeqGen
20222027 pendingChange .changeID = cid
20232028 pendingChange .startTime = now
2029+ pendingChange .gcTime = now .Add (gcDuration )
20242030 pendingChange .enactedAtTime = time.Time {}
20252031 storeState := cs .stores [pendingChange .target .StoreID ]
20262032 rangeState := cs .ranges [rangeID ]
0 commit comments