allocatorimpl: improve comments for recent changes

wenyihu6 · wenyihu6 · commit 583ca6691760 · 2025-10-06T21:45:20.000-04:00
This commit updates comments to reflect changes made in previous commits.
diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go
@@ -1873,24 +1873,44 @@ func (a Allocator) RebalanceTarget(
 	var target, existingCandidate *candidate
 	var removeReplica roachpb.ReplicationTarget
 
-	// When bestRebalanceTarget selects an option (bestIdx) for the first time, it
-	// builds and caches the corresponding MMA advisor in advisors[bestIdx]. The
-	// advisor stores the means and other information MMA needs to determine if a
-	// candidate conflicts with its goals for this option.  After this,
-	// bestRebalanceTarget is free to mutate the cands set of the 	// option
-	// incrementally. If bestRebalanceTarget selects the same option again, it
-	// will reuse the cached advisor to call into IsInConflictWithMMA.
+	// The loop below can iterate multiple times. This is because the
+	// (source,target) pair chosen by bestRebalanceTarget may be rejected either
+	// by the multi-metric allocator or by the check that a moved replica wouldn't
+	// immediately be removable. bestRebalanceTarget mutates the candidate slice
+	// for the given option (=source) to exclude each considered candidate. For
+	// example, the loop may proceed as follows:
+	// - initially, we may consider rebalancing from s1 to either of s2, s3, or
+	// s5, or rebalancing from s6 to either of s2 or s4: options = [s1 ->
+	// [s2,s3,s5], s6 -> [s2,s4]]. Each option here is considered as an
+	// equivalence class.
+	// - bestRebalanceTarget might pick s6->s4, and removes this choice from
+	// `options`. options now becomes [s1->[s2,s3,s5], s6 -> [s2]].
+	// - mma might reject s6->s4, so we loop around.
+	// - next, s1->s3 might be chosen, but fail the removable replica check.
+	// - so we'll begin a third loop with: options is now [s1->[s2,s5], s6 ->
+	// [s2]].
+	// - s6->s2 might be chosen and might succeed, terminating the loop and
+	// proceeding to make the change.
 	//
-	// For example, the option may start with {s1, s2, s3} as its candidate set,
-	// if bestRebalanceTarget selects s1, it will remove s1 and continue with {s2,
-	// s3}, then say it continues to select s2 and remove s2 and continue with
-	// {s3}, and finally {s3} → {}. Each call to MMA should use the original set
-	// {s1, s2, s3} ∪ {existing} to compute the means. The design here allows MMA
-	// to compute the means for the original set {s1, s2, s3} ∪ {existing} once
-	// and then use it for all calls to MMA, and bestRebalanceTarget does not need
-	// to copy the candidate set.
+	// Note that in general (and in the example) a source store can be considered
+	// multiple times (s6 is considered twice), so we cache the corresponding MMA
+	// advisor to avoid potentially expensive O(store) recomputations. The
+	// corresponding advisor is constructed only once and cached in
+	// results[bestIdx].advisor when the the source store is selected as the best
+	// rebalance target for the first time. After that, bestRebalanceTarget is
+	// free to mutate the cands set of the option. However, MMARebalancerAdvisor
+	// should use the original candidate set union the existing store to compute
+	// the load summary when calling IsInConflictWithMMA. It does so by using the
+	// computed meansLoad summary cached when this option was selected as the best
+	// rebalance target for the first time.
 	var bestIdx int
 
+	// NB: bestRebalanceTarget may modify the candidate set (cands) within each
+	// option in results. However, for each option, the associated source store,
+	// MMARebalanceAdvisor, and their index in results must remain unchanged
+	// throughout the process. This ensures that any cached MMARebalanceAdvisor
+	// continues to correspond to the original candidate set and source store,
+	// even as candidates are removed.
 	for {
 		target, existingCandidate, bestIdx = bestRebalanceTarget(a.randGen, results, a.as)
 		if target == nil {
@@ -1905,11 +1925,11 @@ func (a Allocator) RebalanceTarget(
 		// improvements.
 		if !existingCandidate.isCriticalRebalance(target) {
 			// If the rebalance is not critical, we check if it conflicts with mma's
-			// goal. advisor for the target should always be registered in
-			// bestRebalanceTarget and present in the map. If mma rejects the
-			// rebalance, we will continue to the next target. Note that this target
-			// would have been deleted from the candidates set in bestRebalanceTarget,
-			// so we will not select it again.
+			// goal. advisor for bestIdx should always be cached by
+			// bestRebalanceTarget. If mma rejects the rebalance, we will continue to
+			// the next target. Note that bestRebalanceTarget would delete this target
+			// from the candidates set when being selected, so this target will not be
+			// selected again.
 			if advisor := results[bestIdx].advisor; advisor != nil {
 				if a.as.IsInConflictWithMMA(ctx, target.store.StoreID, advisor, false) {
 					continue
@@ -2803,7 +2823,8 @@ func (a *Allocator) CountBasedRebalancingDisabled() bool {
 // change. This is used to prevent thrashing when both multi-metric and
 // count-based rebalancing are enabled and have conflicting goals.
 // TODO(wenyihu6): since we sometimes see even worse thrashing behaviour with
-// this change, should we introduce another mode for this
+// this change, should we introduce two modes
+// (mma-count with+without thrashing prevention)?
 func (a *Allocator) CountBasedRebalancingOnlyEnabledByMMA() bool {
 	return kvserverbase.LoadBasedRebalancingMode.Get(&a.st.SV) == kvserverbase.LBRebalancingMultiMetricAndCount
 }
diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go
@@ -1420,7 +1420,11 @@ func candidateListForRemoval(
 type rebalanceOptions struct {
 	existing   candidate
 	candidates candidateList
-	advisor    *mmaprototype.MMARebalanceAdvisor
+	// advisor is lazily initialized by bestRebalanceTarget when this option is
+	// selected as best rebalance target. It is used to determine if a candidate
+	// is in conflict with mma's goals when LBRebalancingMultiMetricAndCount mode
+	// is enabled.
+	advisor *mmaprototype.MMARebalanceAdvisor
 }
 
 // equivalenceClass captures the set of "equivalent" replacement candidates
diff --git a/pkg/kv/kvserver/allocator/mmaprototype/allocator_state.go b/pkg/kv/kvserver/allocator/mmaprototype/allocator_state.go
@@ -1297,13 +1297,23 @@ func (a *allocatorState) ensureAnalyzedConstraints(rstate *rangeState) bool {
 
 // MMARebalanceAdvisor contains information that mma needs to determine if a
 // candidate is in conflict with its goals. All fields should be immutable after
-// creation.
+// its initialization.
+//
+// MMARebalanceAdvisor uses the meansLoad summary to compute the load summary
+// for a provided candidate. Then it compares the candidate load summary against
+// the existingStoreSLS to determine if the candidate is more overloaded than
+// the existing store. If yes, mma will return true for IsInConflictWithMMA. It
+// is up to the caller to decide what to do with this information.
 type MMARebalanceAdvisor struct {
 	// disabled is true when MMA is disabled. It overrides all decisions with
 	// IsInConflictWithMMA returning false.
-	disabled        bool
+	disabled bool
+	// existingStoreID is the ID of the existing store.
 	existingStoreID roachpb.StoreID
-	// existingStoreSLS is the load summary for the existing store.
+	// existingStoreSLS holds the load summary for the existing store. It is
+	// initially nil and is computed using existingStoreID and means the first
+	// time IsInConflictWithMMA is called. The caller must ensure this advisor is
+	// only used with the corresponding existingStoreID.
 	existingStoreSLS *storeLoadSummary
 	// means is the means for the candidate set.
 	means meansLoad
@@ -1318,11 +1328,13 @@ func NoopMMARebalanceAdvisor() *MMARebalanceAdvisor {
 	}
 }
 
-// BuildMMARebalanceAdvisor creates a MMARebalanceAdvisor for the given existing
-// store and candidates. The advisor is returned here used to determine if a
-// given candidate is in conflict with the existing store via
-// IsInConflictWithMMA. The candidate set here may or may not include the
-// existing store. mma should include the existing store in the candidate set.
+// BuildMMARebalanceAdvisor constructs an MMARebalanceAdvisor for the given
+// existing store and candidate stores. The advisor can be used to determine if
+// a candidate is in conflict with the existing store via IsInConflictWithMMA.
+// The provided cands list may or may not include the existing store. This
+// method always adds the existing store to the cands list so that it is
+// included in the mean calculation. It is up to computeMeansForStoreSet to
+// handle the de-duplication of storeIDs from the cands list.
 func (a *allocatorState) BuildMMARebalanceAdvisor(
 	existing roachpb.StoreID, cands []roachpb.StoreID,
 ) *MMARebalanceAdvisor {
@@ -1339,9 +1351,11 @@ func (a *allocatorState) BuildMMARebalanceAdvisor(
 }
 
 // IsInConflictWithMMA determines if the given candidate is in conflict with the
-// existing store using the provided MMARebalanceAdvisor. Caller is responsible
-// for making sure the MMARebalanceAdvisor is for the correct existing store and
-// candidate set.
+// existing store using the provided MMARebalanceAdvisor. For simplicity, we
+// currently say that this is in conflict if the candidate is more overloaded
+// than the existing store. This is subject to change in the future. Caller is
+// responsible for making sure the MMARebalanceAdvisor is for the correct
+// existing store and candidate set.
 func (a *allocatorState) IsInConflictWithMMA(
 	ctx context.Context, cand roachpb.StoreID, advisor *MMARebalanceAdvisor, cpuOnly bool,
 ) bool {
@@ -1363,7 +1377,7 @@ func (a *allocatorState) IsInConflictWithMMA(
 		if conflict {
 			log.KvDistribution.VEventf(
 				ctx, 2,
-				"mma rejected candidate s%d (cpu-only) as a replacement for s%d: candidate=%v > existing=%v",
+				"mma rejected candidate s%d(cpu-only) as a replacement for s%d: candidate=%v > existing=%v",
 				cand, advisor.existingStoreID, candSLS.dimSummary[CPURate], existingSLS.dimSummary[CPURate],
 			)
 		}
diff --git a/pkg/kv/kvserver/mmaintegration/thrashing.go b/pkg/kv/kvserver/mmaintegration/thrashing.go
@@ -42,22 +42,23 @@ import (
 // Two main design questions came up.
 // 1. The first was when to exclude overloaded stores (early before mean
 // calculation or late only at target selection).
-// - We decided to include them in the mean calculation but exclude them at the
+// • We decided to include them in the mean calculation but exclude them at the
 // final target selection step. This minimizes code churn, avoids plumbing new
 // fields into candidate structs, and reduces number of mma calls by checking
 // only the final target instead of on every candidate. It does not eliminate
 // thrashing, since a store may look like a good candidate during scoring but be
 // rejected later, picking a not-so-good but still better than existing
 // candidate. The lease queue follows the same rule, filtering overloaded stores
 // only at final target selection.
-// - Alternatives considered: 1. mma participates in the allocator's scoring
+// • Alternatives considered: 1. mma participates in the allocator's scoring
 // options either by jittering balance score or by introducing a new field in
 // the candidate struct. 2. exclude the store right before or right after the
 // equivalence class construction.
 //
-// 2. The second question was which set of stores to use when computing load
-// summaries with respect to.
-// - The principle we followed is to use the same set of stores that is used to
+// 2. The second question is: when MMA computes a store’s load summary, it
+// requires a set of stores as a basis. The question is, which set of stores
+// should be used?
+// • The principle we followed is to use the same set of stores that is used to
 // compute the mean for range or lease count. For the replicate queue, this
 // means we use all stores that satisfy constraints to compute mean. The
 // principle we are following here is that we want this set or the mean to be
@@ -66,7 +67,9 @@ import (
 // constraint-satisfying stores. For the lease queue, this means we use all
 // stores that satisfy the constraint to compute the lease count mean as well.
 // This approach differs from how mma computes load summary for lease transfers
-// - mma computes load summary over stores that the existing replicas are on.
+// (mma computes load summary over stores that the existing replicas are on).
+// • Note that MMARebalanceAdvisor also always include the existing store in the
+// set of stores to compute the load summary with respect to.
 //
 // Alternatives considered:
 // 1. Another option was to let mma choose from a set of candidates, but this was
@@ -88,10 +91,11 @@ import (
 // for IsInConflictWithMMA. If MMA is enabled, the advisor is created by
 // computing the load summary for the provided existing store and candidate set.
 //
-// Note that MMA continues to use this candidate set to compute load summaries,
-// so it is safe for the caller to modify the candidate set after calling this
-// function. The caller is responsible for keeping track of the returned advisor
-// and associating it with the corresponding existing store.
+// Note that MMARebalanceAdvisor should always use the means summary constructed
+// during BuildMMARebalanceAdvisor to compute the load summary for the provided
+// candidate in IsInConflictWithMMA. The caller may modify its candidate set
+// after calling this function, so the caller is responsible for keeping track
+// of the returned advisor and associating it.
 func (as *AllocatorSync) BuildMMARebalanceAdvisor(
 	existing roachpb.StoreID, cands []roachpb.StoreID,
 ) *mmaprototype.MMARebalanceAdvisor {