@@ -42,6 +42,8 @@ type rebalanceState struct {
4242 maxLeaseTransferCount int
4343 // lastFailedChangeDelayDuration is the delay after a failed change before retrying.
4444 lastFailedChangeDelayDuration time.Duration
45+ // now is the timestamp when rebalancing started.
46+ now time.Time
4547 // Scratch variables reused across iterations.
4648 scratch struct {
4749 disj [1 ]constraintsConj
@@ -178,20 +180,21 @@ func (cs *clusterState) rebalanceStores(
178180 maxRangeMoveCount : maxRangeMoveCount ,
179181 maxLeaseTransferCount : maxLeaseTransferCount ,
180182 lastFailedChangeDelayDuration : lastFailedChangeDelayDuration ,
183+ now : now ,
181184 }
182185 rs .scratch .nodes = map [roachpb.NodeID ]* NodeLoad {}
183186 rs .scratch .stores = map [roachpb.StoreID ]struct {}{}
184187 for _ , store := range sheddingStores {
185188 if rs .rangeMoveCount >= rs .maxRangeMoveCount || rs .leaseTransferCount >= rs .maxLeaseTransferCount {
186189 break
187190 }
188- rs .rebalanceStore (store , ctx , localStoreID , now )
191+ rs .rebalanceStore (store , ctx , localStoreID )
189192 }
190193 return rs .changes
191194}
192195
193196func (rs * rebalanceState ) rebalanceStore (
194- store sheddingStore , ctx context.Context , localStoreID roachpb.StoreID , now time. Time ,
197+ store sheddingStore , ctx context.Context , localStoreID roachpb.StoreID ,
195198) {
196199 log .KvDistribution .Infof (ctx , "start processing shedding store s%d: cpu node load %s, store load %s, worst dim %s" ,
197200 store .StoreID , store .nls , store .sls , store .worstDim )
@@ -226,7 +229,7 @@ func (rs *rebalanceState) rebalanceStore(
226229 // behalf of a particular store (vs. being called on behalf of the set
227230 // of local store IDs)?
228231 if ss .StoreID == localStoreID && store .dimSummary [CPURate ] >= overloadSlow {
229- shouldSkipReplicaMoves := rs .rebalanceLeases (ss , store , ctx , localStoreID , now )
232+ shouldSkipReplicaMoves := rs .rebalanceLeases (ss , store , ctx , localStoreID )
230233 if shouldSkipReplicaMoves {
231234 return
232235 }
@@ -236,19 +239,18 @@ func (rs *rebalanceState) rebalanceStore(
236239 }
237240
238241 log .KvDistribution .VInfof (ctx , 2 , "attempting to shed replicas next" )
239- rs .rebalanceReplicas (ctx , store , ss , localStoreID , now )
242+ rs .rebalanceReplicas (ctx , store , ss , localStoreID )
240243}
241244
242245func (rs * rebalanceState ) rebalanceReplicas (
243246 ctx context.Context ,
244247 store sheddingStore ,
245248 ss * storeState ,
246249 localStoreID roachpb.StoreID ,
247- now time.Time ,
248250) {
249251 doneShedding := false
250252 if store .StoreID != localStoreID && store .dimSummary [CPURate ] >= overloadSlow &&
251- now .Sub (ss .overloadStartTime ) < remoteStoreLeaseSheddingGraceDuration {
253+ rs . now .Sub (ss .overloadStartTime ) < remoteStoreLeaseSheddingGraceDuration {
252254 log .KvDistribution .VInfof (ctx , 2 , "skipping remote store s%d: in lease shedding grace period" , store .StoreID )
253255 return
254256 }
@@ -282,7 +284,7 @@ func (rs *rebalanceState) rebalanceReplicas(
282284 log .KvDistribution .VInfof (ctx , 2 , "skipping r%d: has pending changes" , rangeID )
283285 continue
284286 }
285- if now .Sub (rstate .lastFailedChange ) < rs .lastFailedChangeDelayDuration {
287+ if rs . now .Sub (rstate .lastFailedChange ) < rs .lastFailedChangeDelayDuration {
286288 log .KvDistribution .VInfof (ctx , 2 , "skipping r%d: too soon after failed change" , rangeID )
287289 continue
288290 }
@@ -373,7 +375,7 @@ func (rs *rebalanceState) rebalanceReplicas(
373375 // simple but effective manner. For now, we capture this using these
374376 // grace duration thresholds.
375377 ignoreLevel := ignoreLoadNoChangeAndHigher
376- overloadDur := now .Sub (ss .overloadStartTime )
378+ overloadDur := rs . now .Sub (ss .overloadStartTime )
377379 if overloadDur > ignoreHigherThanLoadThresholdGraceDuration {
378380 ignoreLevel = ignoreHigherThanLoadThreshold
379381 log .KvDistribution .VInfof (ctx , 3 , "using level %v (threshold:%v) for r%d based on overload duration %v" ,
@@ -453,7 +455,6 @@ func (rs *rebalanceState) rebalanceLeases(
453455 store sheddingStore ,
454456 ctx context.Context ,
455457 localStoreID roachpb.StoreID ,
456- now time.Time ,
457458) bool {
458459 log .KvDistribution .VInfof (ctx , 2 , "local store s%d is CPU overloaded (%v >= %v), attempting lease transfers first" ,
459460 store .StoreID , store .dimSummary [CPURate ], overloadSlow )
@@ -489,7 +490,7 @@ func (rs *rebalanceState) rebalanceLeases(
489490 " changes but is not leaseholder: %+v" , rstate )
490491 }
491492 }
492- if now .Sub (rstate .lastFailedChange ) < rs .lastFailedChangeDelayDuration {
493+ if rs . now .Sub (rstate .lastFailedChange ) < rs .lastFailedChangeDelayDuration {
493494 log .KvDistribution .VInfof (ctx , 2 , "skipping r%d: too soon after failed change" , rangeID )
494495 continue
495496 }
0 commit comments