@@ -1873,24 +1873,44 @@ func (a Allocator) RebalanceTarget(
1873
1873
var target , existingCandidate * candidate
1874
1874
var removeReplica roachpb.ReplicationTarget
1875
1875
1876
- // When bestRebalanceTarget selects an option (bestIdx) for the first time, it
1877
- // builds and caches the corresponding MMA advisor in advisors[bestIdx]. The
1878
- // advisor stores the means and other information MMA needs to determine if a
1879
- // candidate conflicts with its goals for this option. After this,
1880
- // bestRebalanceTarget is free to mutate the cands set of the // option
1881
- // incrementally. If bestRebalanceTarget selects the same option again, it
1882
- // will reuse the cached advisor to call into IsInConflictWithMMA.
1876
+ // The loop below can iterate multiple times. This is because the
1877
+ // (source,target) pair chosen by bestRebalanceTarget may be rejected either
1878
+ // by the multi-metric allocator or by the check that a moved replica wouldn't
1879
+ // immediately be removable. bestRebalanceTarget mutates the candidate slice
1880
+ // for the given option (=source) to exclude each considered candidate. For
1881
+ // example, the loop may proceed as follows:
1882
+ // - initially, we may consider rebalancing from s1 to either of s2, s3, or
1883
+ // s5, or rebalancing from s6 to either of s2 or s4: options = [s1 ->
1884
+ // [s2,s3,s5], s6 -> [s2,s4]]. Each option here is considered as an
1885
+ // equivalence class.
1886
+ // - bestRebalanceTarget might pick s6->s4, and removes this choice from
1887
+ // `options`. options now becomes [s1->[s2,s3,s5], s6 -> [s2]].
1888
+ // - mma might reject s6->s4, so we loop around.
1889
+ // - next, s1->s3 might be chosen, but fail the removable replica check.
1890
+ // - so we'll begin a third loop with: options is now [s1->[s2,s5], s6 ->
1891
+ // [s2]].
1892
+ // - s6->s2 might be chosen and might succeed, terminating the loop and
1893
+ // proceeding to make the change.
1883
1894
//
1884
- // For example, the option may start with {s1, s2, s3} as its candidate set,
1885
- // if bestRebalanceTarget selects s1, it will remove s1 and continue with {s2,
1886
- // s3}, then say it continues to select s2 and remove s2 and continue with
1887
- // {s3}, and finally {s3} → {}. Each call to MMA should use the original set
1888
- // {s1, s2, s3} ∪ {existing} to compute the means. The design here allows MMA
1889
- // to compute the means for the original set {s1, s2, s3} ∪ {existing} once
1890
- // and then use it for all calls to MMA, and bestRebalanceTarget does not need
1891
- // to copy the candidate set.
1895
+ // Note that in general (and in the example) a source store can be considered
1896
+ // multiple times (s6 is considered twice), so we cache the corresponding MMA
1897
+ // advisor to avoid potentially expensive O(store) recomputations. The
1898
+ // corresponding advisor is constructed only once and cached in
1899
+ // results[bestIdx].advisor when the the source store is selected as the best
1900
+ // rebalance target for the first time. After that, bestRebalanceTarget is
1901
+ // free to mutate the cands set of the option. However, MMARebalancerAdvisor
1902
+ // should use the original candidate set union the existing store to compute
1903
+ // the load summary when calling IsInConflictWithMMA. It does so by using the
1904
+ // computed meansLoad summary cached when this option was selected as the best
1905
+ // rebalance target for the first time.
1892
1906
var bestIdx int
1893
1907
1908
+ // NB: bestRebalanceTarget may modify the candidate set (cands) within each
1909
+ // option in results. However, for each option, the associated source store,
1910
+ // MMARebalanceAdvisor, and their index in results must remain unchanged
1911
+ // throughout the process. This ensures that any cached MMARebalanceAdvisor
1912
+ // continues to correspond to the original candidate set and source store,
1913
+ // even as candidates are removed.
1894
1914
for {
1895
1915
target , existingCandidate , bestIdx = bestRebalanceTarget (a .randGen , results , a .as )
1896
1916
if target == nil {
@@ -1905,11 +1925,11 @@ func (a Allocator) RebalanceTarget(
1905
1925
// improvements.
1906
1926
if ! existingCandidate .isCriticalRebalance (target ) {
1907
1927
// If the rebalance is not critical, we check if it conflicts with mma's
1908
- // goal. advisor for the target should always be registered in
1909
- // bestRebalanceTarget and present in the map. If mma rejects the
1910
- // rebalance, we will continue to the next target. Note that this target
1911
- // would have been deleted from the candidates set in bestRebalanceTarget,
1912
- // so we will not select it again.
1928
+ // goal. advisor for bestIdx should always be cached by
1929
+ // bestRebalanceTarget. If mma rejects the rebalance, we will continue to
1930
+ // the next target. Note that bestRebalanceTarget would delete this target
1931
+ // from the candidates set when being selected, so this target will not be
1932
+ // selected again.
1913
1933
if advisor := results [bestIdx ].advisor ; advisor != nil {
1914
1934
if a .as .IsInConflictWithMMA (ctx , target .store .StoreID , advisor , false ) {
1915
1935
continue
@@ -2803,7 +2823,8 @@ func (a *Allocator) CountBasedRebalancingDisabled() bool {
2803
2823
// change. This is used to prevent thrashing when both multi-metric and
2804
2824
// count-based rebalancing are enabled and have conflicting goals.
2805
2825
// TODO(wenyihu6): since we sometimes see even worse thrashing behaviour with
2806
- // this change, should we introduce another mode for this
2826
+ // this change, should we introduce two modes
2827
+ // (mma-count with+without thrashing prevention)?
2807
2828
func (a * Allocator ) CountBasedRebalancingOnlyEnabledByMMA () bool {
2808
2829
return kvserverbase .LoadBasedRebalancingMode .Get (& a .st .SV ) == kvserverbase .LBRebalancingMultiMetricAndCount
2809
2830
}
0 commit comments