|
| 1 | +// Copyright 2025 The Cockroach Authors. |
| 2 | +// |
| 3 | +// Use of this software is governed by the CockroachDB Software License |
| 4 | +// included in the /LICENSE file. |
| 5 | + |
| 6 | +package mmaprototype |
| 7 | + |
| 8 | +import ( |
| 9 | + "context" |
| 10 | + |
| 11 | + "github.com/cockroachdb/cockroach/pkg/roachpb" |
| 12 | + "github.com/cockroachdb/cockroach/pkg/util/log" |
| 13 | +) |
| 14 | + |
| 15 | +// MMARebalanceAdvisor contains information that mma needs to determine if a |
| 16 | +// candidate is in conflict with its goals. All fields should be immutable after |
| 17 | +// its initialization. |
| 18 | +// |
| 19 | +// MMARebalanceAdvisor uses the meansLoad summary to compute the load summary |
| 20 | +// for a provided candidate. Then it compares the candidate load summary against |
| 21 | +// the existingStoreSLS to determine if the candidate is more overloaded than |
| 22 | +// the existing store. If yes, mma will return true for IsInConflictWithMMA. It |
| 23 | +// is up to the caller to decide what to do with this information. |
| 24 | +type MMARebalanceAdvisor struct { |
| 25 | + // disabled is true when MMA is disabled. It overrides all decisions with |
| 26 | + // IsInConflictWithMMA returning false. |
| 27 | + disabled bool |
| 28 | + // existingStoreID is the ID of the existing store. |
| 29 | + existingStoreID roachpb.StoreID |
| 30 | + // existingStoreSLS holds the load summary for the existing store. It is |
| 31 | + // initially nil and is computed using existingStoreID and means the first |
| 32 | + // time IsInConflictWithMMA is called. The caller must ensure this advisor is |
| 33 | + // only used with the corresponding existingStoreID. |
| 34 | + existingStoreSLS *storeLoadSummary |
| 35 | + // means is the means for the candidate set. |
| 36 | + means meansLoad |
| 37 | +} |
| 38 | + |
| 39 | +// NoopMMARebalanceAdvisor is a no-op MMARebalanceAdvisor that always returns |
| 40 | +// false for IsInConflictWithMMA. Used when MMA is disabled or mma does not have |
| 41 | +// enough information to determine. |
| 42 | +func NoopMMARebalanceAdvisor() *MMARebalanceAdvisor { |
| 43 | + return &MMARebalanceAdvisor{ |
| 44 | + disabled: true, |
| 45 | + } |
| 46 | +} |
| 47 | + |
| 48 | +// BuildMMARebalanceAdvisor constructs an MMARebalanceAdvisor for the given |
| 49 | +// existing store and candidate stores. The advisor can be used to determine if |
| 50 | +// a candidate is in conflict with the existing store via IsInConflictWithMMA. |
| 51 | +// The provided cands list may or may not include the existing store. This |
| 52 | +// method always adds the existing store to the cands list so that it is |
| 53 | +// included in the mean calculation. It is up to computeMeansForStoreSet to |
| 54 | +// handle the de-duplication of storeIDs from the cands list. |
| 55 | +func (a *allocatorState) BuildMMARebalanceAdvisor( |
| 56 | + existing roachpb.StoreID, cands []roachpb.StoreID, |
| 57 | +) *MMARebalanceAdvisor { |
| 58 | + // TODO(wenyihu6): for simplicity, we create a new scratchNodes every call. |
| 59 | + // We should reuse the scratchNodes instead. |
| 60 | + scratchNodes := map[roachpb.NodeID]*NodeLoad{} |
| 61 | + scratchStores := map[roachpb.StoreID]struct{}{} |
| 62 | + cands = append(cands, existing) |
| 63 | + means := computeMeansForStoreSet(a.cs, cands, scratchNodes, scratchStores) |
| 64 | + return &MMARebalanceAdvisor{ |
| 65 | + existingStoreID: existing, |
| 66 | + means: means, |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +// IsInConflictWithMMA determines if the given candidate is in conflict with the |
| 71 | +// existing store using the provided MMARebalanceAdvisor. For simplicity, we |
| 72 | +// currently say that this is in conflict if the candidate is more overloaded |
| 73 | +// than the existing store. This is subject to change in the future. Caller is |
| 74 | +// responsible for making sure the MMARebalanceAdvisor is for the correct |
| 75 | +// existing store and candidate set. |
| 76 | +func (a *allocatorState) IsInConflictWithMMA( |
| 77 | + ctx context.Context, cand roachpb.StoreID, advisor *MMARebalanceAdvisor, cpuOnly bool, |
| 78 | +) bool { |
| 79 | + if advisor.disabled { |
| 80 | + return false |
| 81 | + } |
| 82 | + // Lazily compute and cache the load summary for the existing store. |
| 83 | + if advisor.existingStoreSLS == nil { |
| 84 | + summary := a.cs.computeLoadSummary(ctx, advisor.existingStoreID, &advisor.means.storeLoad, &advisor.means.nodeLoad) |
| 85 | + advisor.existingStoreSLS = &summary |
| 86 | + } |
| 87 | + existingSLS := advisor.existingStoreSLS |
| 88 | + // Always compute the candidate's load summary. |
| 89 | + candSLS := a.cs.computeLoadSummary(ctx, cand, &advisor.means.storeLoad, &advisor.means.nodeLoad) |
| 90 | + |
| 91 | + var conflict bool |
| 92 | + if cpuOnly { |
| 93 | + conflict = candSLS.dimSummary[CPURate] > existingSLS.dimSummary[CPURate] |
| 94 | + if conflict { |
| 95 | + log.KvDistribution.VEventf( |
| 96 | + ctx, 2, |
| 97 | + "mma rejected candidate s%d(cpu-only) as a replacement for s%d: candidate=%v > existing=%v", |
| 98 | + cand, advisor.existingStoreID, candSLS.dimSummary[CPURate], existingSLS.dimSummary[CPURate], |
| 99 | + ) |
| 100 | + } |
| 101 | + } else { |
| 102 | + conflict = candSLS.sls > existingSLS.sls |
| 103 | + if conflict { |
| 104 | + log.KvDistribution.VEventf( |
| 105 | + ctx, 2, |
| 106 | + "mma rejected candidate s%d as a replacement for s%d: candidate=%v > existing=%v", |
| 107 | + cand, advisor.existingStoreID, candSLS.sls, existingSLS.sls, |
| 108 | + ) |
| 109 | + } |
| 110 | + } |
| 111 | + return conflict |
| 112 | +} |
0 commit comments