@@ -772,10 +772,9 @@ type storeState struct {
772772 storeLoad
773773 StoreAttributesAndLocality
774774 adjusted struct {
775- // TODO: these adjusted load values can become negative due to applying
776- // pending changes. We need to let them be negative to retain the ability
777- // to undo pending changes. Audit the mean computation and rebalancing
778- // code to ensure that we bump up to a lower bound of zero.
775+ // NB: these load values can become negative due to applying pending
776+ // changes. We need to let them be negative to retain the ability to undo
777+ // pending changes.
779778 load LoadVector
780779 secondaryLoad SecondaryLoadVector
781780 // Pending changes for computing loadReplicas and load.
@@ -977,6 +976,7 @@ func newStoreState() *storeState {
977976type nodeState struct {
978977 stores []roachpb.StoreID
979978 NodeLoad
979+ // NB: adjustedCPU can be negative.
980980 adjustedCPU LoadValue
981981}
982982
@@ -1780,10 +1780,14 @@ func (cs *clusterState) processStoreLeaseholderMsgInternal(
17801780 // will start shedding replicas, so this is just a heuristic.
17811781 fraction = minLeaseLoadFraction
17821782 }
1783- threshold := LoadValue (float64 (ss .adjusted .load [topk .dim ]) * fraction )
1783+ // The max(0, ...) is defensive, in case the adjusted load is negative.
1784+ // Given that this is a most overloaded dim, the likelihood of the
1785+ // adjusted load being negative is very low.
1786+ adjustedStoreLoadValue := max (0 , ss .adjusted .load [topk .dim ])
1787+ threshold := LoadValue (float64 (adjustedStoreLoadValue ) * fraction )
17841788 if ss .reportedSecondaryLoad [ReplicaCount ] > 0 {
17851789 // Allow all ranges above 90% of the mean. This is quite arbitrary.
1786- meanLoad := (ss . adjusted . load [ topk . dim ] * 9 ) / (ss .reportedSecondaryLoad [ReplicaCount ] * 10 )
1790+ meanLoad := (adjustedStoreLoadValue * 9 ) / (ss .reportedSecondaryLoad [ReplicaCount ] * 10 )
17871791 threshold = min (meanLoad , threshold )
17881792 }
17891793 topk .threshold = threshold
@@ -2209,11 +2213,6 @@ func (cs *clusterState) undoReplicaChange(change ReplicaChange) {
22092213 cs .undoChangeLoadDelta (change )
22102214}
22112215
2212- // TODO(kvoli,sumeerbhola): The load of the store and node can become negative
2213- // when applying or undoing load adjustments. For load adjustments to be
2214- // reversible quickly, we aren't able to zero out the value when negative. We
2215- // should handle the negative values when using them.
2216-
22172216// applyChangeLoadDelta adds the change load delta to the adjusted load of the
22182217// store and node affected.
22192218func (cs * clusterState ) applyChangeLoadDelta (change ReplicaChange ) {
@@ -2416,6 +2415,11 @@ func (cs *clusterState) canShedAndAddLoad(
24162415 if targetSS .adjusted .load [overloadedDim ] > 0 {
24172416 overloadedDimFractionIncrease = float64 (deltaToAdd [overloadedDim ]) /
24182417 float64 (targetSS .adjusted .load [overloadedDim ])
2418+ } else {
2419+ // Else, the adjusted load on the overloadedDim is zero or negative, which
2420+ // is possible, but extremely rare in practice. We arbitrarily set the
2421+ // fraction increase to 1.0 in this case.
2422+ overloadedDimFractionIncrease = 1.0
24192423 }
24202424 otherDimensionsBecameWorseInTarget := false
24212425 for i := range targetSLS .dimSummary {
@@ -2428,16 +2432,18 @@ func (cs *clusterState) canShedAndAddLoad(
24282432 }
24292433 // This is an overloaded dimension in the target. Only allow small
24302434 // increases along this dimension.
2431- dimFractionIncrease := math .MaxFloat64
24322435 if targetSS .adjusted .load [dim ] > 0 {
2433- dimFractionIncrease = float64 (deltaToAdd [dim ]) / float64 (targetSS .adjusted .load [dim ])
2434- }
2435- // The use of 33% is arbitrary.
2436- if dimFractionIncrease > overloadedDimFractionIncrease / 3 {
2437- log . KvDistribution . Infof ( ctx , "%v: %f > %f/3" , dim , dimFractionIncrease , overloadedDimFractionIncrease )
2438- otherDimensionsBecameWorseInTarget = true
2439- break
2436+ dimFractionIncrease : = float64 (deltaToAdd [dim ]) / float64 (targetSS .adjusted .load [dim ])
2437+ // The use of 33% is arbitrary.
2438+ if dimFractionIncrease > overloadedDimFractionIncrease / 3 {
2439+ log . KvDistribution . Infof ( ctx , "%v: %f > %f/3" , dim , dimFractionIncrease , overloadedDimFractionIncrease )
2440+ otherDimensionsBecameWorseInTarget = true
2441+ break
2442+ }
24402443 }
2444+ // Else the adjusted load in dimension dim is zero or negative, which is
2445+ // possible, but extremely rare in practice. We ignore this dimension in
2446+ // that case.
24412447 }
24422448 canAddLoad = overloadedDimPermitsChange && ! otherDimensionsBecameWorseInTarget &&
24432449 targetSLS .maxFractionPendingIncrease < epsilon &&
@@ -2527,7 +2533,6 @@ func computeLoadSummary(
25272533 var dimSummary [NumLoadDimensions ]loadSummary
25282534 var worstDim LoadDimension
25292535 for i := range msl .load {
2530- // TODO(kvoli,sumeerbhola): Handle negative adjusted store/node loads.
25312536 const nodeIDForLogging = 0
25322537 ls := loadSummaryForDimension (ctx , ss .StoreID , nodeIDForLogging , LoadDimension (i ), ss .adjusted .load [i ], ss .capacity [i ],
25332538 msl .load [i ], msl .util [i ])
0 commit comments