Skip to content

Commit e866982

Browse files
committed
mmaintegration: add StoreStatus to mma status translation plumbing
This commit adds the translation layer from StoreStatus to mma's (Health, Disposition) model. sma currently relies on StorePool methods (GetStoreList, LiveAndDeadReplicas) which internally compute StoreStatus using: NodeLivenessFunc (membership + health) combined with other signals (throttling, suspect state) to determine StoreStatus. To preserve sma's behavior, mma reuses StorePool's status() method and translates it to its own (Health, Disposition) model rather than re-deriving health independently. Alternatives considered (and rejected): 1. Query NodeLiveness directly in mma: NodeLiveness operates at the node level, while StorePool tracks per-store state so store status on the same node can diverge based on gossip timing and store specific signals. In addition, NodeLiveness does not include other store signals such as throttling (snapshot backpressure) and suspect status (recently unavailable) which are currently used by sma to filter candidates when making lease/replica placement decisions. 2. Periodically poll storepool from mma Statuses are plumbed before ComputeChanges() instead of periodically in another goroutine.It is more complex, may be stale and less efficient. mma currently only needs updated health statuses for ComputeChanges. Note that the translation goes through allocator sync, not directly in mmaprototype, to avoid importing storepool there and keep layering clean.
1 parent f3f12a6 commit e866982

File tree

12 files changed

+542
-1
lines changed

12 files changed

+542
-1
lines changed

pkg/kv/kvserver/allocator/mmaprototype/allocator.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ type Allocator interface {
6060
// associated node in the cluster.
6161
ProcessStoreLoadMsg(ctx context.Context, msg *StoreLoadMsg)
6262

63+
// UpdateStoreStatus updates the health and disposition for the stores in storeStatuses according to the statuses in storeStatuses.
64+
// Stores not known to the allocator are ignored with logging.
65+
// TODO(wenyihu6): if this is too expensive, we should only update status for stores that have changed.
66+
UpdateStoreStatus(ctx context.Context, storeStatuses map[roachpb.StoreID]Status)
67+
6368
// Methods related to making changes.
6469

6570
// AdjustPendingChangeDisposition is optional feedback to inform the

pkg/kv/kvserver/allocator/mmaprototype/allocator_state.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,15 @@ func (a *allocatorState) ProcessStoreLoadMsg(ctx context.Context, msg *StoreLoad
204204
a.cs.processStoreLoadMsg(ctx, msg)
205205
}
206206

207+
// UpdateStoreStatus implements the Allocator interface.
208+
func (a *allocatorState) UpdateStoreStatus(
209+
ctx context.Context, storeStatuses map[roachpb.StoreID]Status,
210+
) {
211+
a.mu.Lock()
212+
defer a.mu.Unlock()
213+
a.cs.updateStoreStatuses(ctx, storeStatuses)
214+
}
215+
207216
// AdjustPendingChangeDisposition implements the Allocator interface.
208217
func (a *allocatorState) AdjustPendingChangeDisposition(
209218
ctx context.Context, change ExternalRangeChange, success bool,

pkg/kv/kvserver/allocator/mmaprototype/cluster_state.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2216,6 +2216,23 @@ func (cs *clusterState) setStore(sal storeAttributesAndLocalityWithNodeTier) {
22162216
}
22172217
}
22182218

2219+
// updateStoreStatuses updates each known store's health and disposition from storeStatuses.
2220+
// Stores unknown in mma yet but are known to store pool are ignored with logging.
2221+
func (cs *clusterState) updateStoreStatuses(
2222+
ctx context.Context, storeStatuses map[roachpb.StoreID]Status,
2223+
) {
2224+
for storeID, storeStatus := range storeStatuses {
2225+
if _, ok := cs.stores[storeID]; !ok {
2226+
// Store not known to mma yet but is known to store pool - ignore the update. The store will be added via
2227+
// setStore when gossip arrives, and then subsequent status updates will
2228+
// take effect.
2229+
log.KvDistribution.Infof(ctx, "store %d not found in cluster state, skipping update", storeID)
2230+
continue
2231+
}
2232+
cs.stores[storeID].status = storeStatus
2233+
}
2234+
}
2235+
22192236
//======================================================================
22202237
// clusterState accessors:
22212238
//
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
# This test verifies mma's candidate exclusion and shedding behavior based on
2+
# store status. Note that this test does not verify the correctness of the
3+
# actual translation from store pool status to mma status. That is tested in
4+
# the mmaintegration package.
5+
#
6+
# Setup: 3 stores
7+
# - s1: source store (overloaded, wants to shed)
8+
# - s2: always available (good target)
9+
# - s3: test store (status changes to test each scenario)
10+
set-store
11+
store-id=1 node-id=1
12+
store-id=2 node-id=2
13+
store-id=3 node-id=3
14+
----
15+
node-id=1 locality-tiers=node=1
16+
store-id=1 attrs=
17+
node-id=2 locality-tiers=node=2
18+
store-id=2 attrs=
19+
node-id=3 locality-tiers=node=3
20+
store-id=3 attrs=
21+
22+
# s1 is overloaded, s2 and s3 are low load
23+
store-load-msg
24+
store-id=1 node-id=1 load=[1000,0,0] capacity=[1000,1000,1000] secondary-load=0 load-time=0s
25+
store-id=2 node-id=2 load=[100,0,0] capacity=[1000,1000,1000] secondary-load=0 load-time=0s
26+
store-id=3 node-id=3 load=[100,0,0] capacity=[1000,1000,1000] secondary-load=0 load-time=0s
27+
----
28+
29+
# Range r1: lease on s1, replicas on s1, s2, s3
30+
store-leaseholder-msg
31+
store-id=1
32+
range-id=1 load=[100,0,0] raft-cpu=100
33+
store-id=1 replica-id=1 type=VOTER_FULL leaseholder=true
34+
store-id=2 replica-id=2 type=VOTER_FULL
35+
store-id=3 replica-id=3 type=VOTER_FULL
36+
config=num_replicas=3 constraints={} voter_constraints={}
37+
----
38+
39+
# Baseline: all stores available
40+
retain-ready-replica-target-stores-only in=(2,3)
41+
----
42+
[2 3]
43+
44+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
45+
----
46+
[2 3]
47+
48+
# Dead: excluded from all targets (shedding leases, shedding replicas)
49+
set-store-status store-id=3 health=dead leases=shedding replicas=shedding
50+
----
51+
dead shedding=leases,replicas
52+
53+
retain-ready-replica-target-stores-only in=(2,3)
54+
----
55+
skipping s3 for replica transfer: replica disposition shedding (health dead)
56+
[2]
57+
58+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
59+
----
60+
skipping s3 for lease transfer: lease disposition shedding (health dead)
61+
[2]
62+
63+
# Unknown: excluded from all targets (refusing leases, refusing replicas)
64+
set-store-status store-id=3 health=unknown leases=refusing replicas=refusing
65+
----
66+
unknown refusing=leases,replicas
67+
68+
retain-ready-replica-target-stores-only in=(2,3)
69+
----
70+
skipping s3 for replica transfer: replica disposition refusing (health unknown)
71+
[2]
72+
73+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
74+
----
75+
skipping s3 for lease transfer: lease disposition refusing (health unknown)
76+
[2]
77+
78+
# Decommissioning: excluded from all targets (shedding leases, shedding replicas)
79+
set-store-status store-id=3 health=ok leases=shedding replicas=shedding
80+
----
81+
ok shedding=leases,replicas
82+
83+
retain-ready-replica-target-stores-only in=(2,3)
84+
----
85+
skipping s3 for replica transfer: replica disposition shedding (health ok)
86+
[2]
87+
88+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
89+
----
90+
skipping s3 for lease transfer: lease disposition shedding (health ok)
91+
[2]
92+
93+
# Draining: excluded from all targets (shedding leases, refusing replicas)
94+
set-store-status store-id=3 health=ok leases=shedding replicas=refusing
95+
----
96+
ok refusing=replicas shedding=leases
97+
98+
retain-ready-replica-target-stores-only in=(2,3)
99+
----
100+
skipping s3 for replica transfer: replica disposition refusing (health ok)
101+
[2]
102+
103+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
104+
----
105+
skipping s3 for lease transfer: lease disposition shedding (health ok)
106+
[2]
107+
108+
# Suspect: excluded from all targets (shedding leases, refusing replicas)
109+
set-store-status store-id=3 health=unhealthy leases=shedding replicas=refusing
110+
----
111+
unhealthy refusing=replicas shedding=leases
112+
113+
retain-ready-replica-target-stores-only in=(2,3)
114+
----
115+
skipping s3 for replica transfer: replica disposition refusing (health unhealthy)
116+
[2]
117+
118+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
119+
----
120+
skipping s3 for lease transfer: lease disposition shedding (health unhealthy)
121+
[2]
122+
123+
# Throttled: excluded from replica targets, can receive leases
124+
set-store-status store-id=3 health=ok leases=ok replicas=refusing
125+
----
126+
ok refusing=replicas
127+
128+
retain-ready-replica-target-stores-only in=(2,3)
129+
----
130+
skipping s3 for replica transfer: replica disposition refusing (health ok)
131+
[2]
132+
133+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
134+
----
135+
[2 3]
136+
137+
# Available: accepts everything
138+
set-store-status store-id=3 health=ok leases=ok replicas=ok
139+
----
140+
ok accepting all
141+
142+
retain-ready-replica-target-stores-only in=(2,3)
143+
----
144+
[2 3]
145+
146+
retain-ready-lease-target-stores-only in=(2,3) range-id=1
147+
----
148+
[2 3]
149+
150+
# Rebalance test: verify s3 (dead) is excluded during actual rebalance
151+
set-store-status store-id=3 health=dead leases=shedding replicas=shedding
152+
----
153+
dead shedding=leases,replicas
154+
155+
rebalance-stores store-id=1
156+
----
157+
[mmaid=1] rebalanceStores begins
158+
[mmaid=1] cluster means: (stores-load [cpu:400ns/s, write-bandwidth:0 B/s, byte-size:0 B]) (stores-capacity [cpu:1µs/s, write-bandwidth:1.0 kB/s, byte-size:1.0 kB]) (nodes-cpu-load 400) (nodes-cpu-capacity 1000)
159+
[mmaid=1] load summary for dim=CPURate (s1): overloadUrgent, reason: fractionUsed > 90% [load=1000 meanLoad=400 fractionUsed=100.00% meanUtil=40.00% capacity=1000]
160+
[mmaid=1] load summary for dim=WriteBandwidth (s1): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
161+
[mmaid=1] load summary for dim=ByteSize (s1): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
162+
[mmaid=1] load summary for dim=CPURate (n1): overloadUrgent, reason: fractionUsed > 90% [load=1000 meanLoad=400 fractionUsed=100.00% meanUtil=40.00% capacity=1000]
163+
[mmaid=1] evaluating s1: node load overloadUrgent, store load overloadUrgent, worst dim CPURate
164+
[mmaid=1] overload-continued s1 ((store=overloadUrgent worst=CPURate cpu=overloadUrgent writes=loadNormal bytes=loadNormal node=overloadUrgent high_disk=false frac_pending=0.00,0.00(true))) - within grace period
165+
[mmaid=1] store s1 was added to shedding store list
166+
[mmaid=1] load summary for dim=CPURate (s2): loadLow, reason: load is >10% below mean [load=100 meanLoad=400 fractionUsed=10.00% meanUtil=40.00% capacity=1000]
167+
[mmaid=1] load summary for dim=WriteBandwidth (s2): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
168+
[mmaid=1] load summary for dim=ByteSize (s2): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
169+
[mmaid=1] load summary for dim=CPURate (n2): loadLow, reason: load is >10% below mean [load=100 meanLoad=400 fractionUsed=10.00% meanUtil=40.00% capacity=1000]
170+
[mmaid=1] evaluating s2: node load loadLow, store load loadNormal, worst dim WriteBandwidth
171+
[mmaid=1] load summary for dim=CPURate (s3): loadLow, reason: load is >10% below mean [load=100 meanLoad=400 fractionUsed=10.00% meanUtil=40.00% capacity=1000]
172+
[mmaid=1] load summary for dim=WriteBandwidth (s3): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
173+
[mmaid=1] load summary for dim=ByteSize (s3): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
174+
[mmaid=1] load summary for dim=CPURate (n3): loadLow, reason: load is >10% below mean [load=100 meanLoad=400 fractionUsed=10.00% meanUtil=40.00% capacity=1000]
175+
[mmaid=1] evaluating s3: node load loadLow, store load loadNormal, worst dim WriteBandwidth
176+
[mmaid=1] start processing shedding store s1: cpu node load overloadUrgent, store load overloadUrgent, worst dim CPURate
177+
[mmaid=1] top-K[CPURate] ranges for s1 with lease on local s1: r1:[cpu:100ns/s, write-bandwidth:0 B/s, byte-size:0 B]
178+
[mmaid=1] local store s1 is CPU overloaded (overloadUrgent >= overloadSlow), attempting lease transfers first
179+
[mmaid=1] considering lease-transfer r1 from s1: candidates are [2 3]
180+
[mmaid=1] skipping s3 for lease transfer: lease disposition shedding (health dead)
181+
[mmaid=1] load summary for dim=CPURate (s1): overloadUrgent, reason: fractionUsed > 90% [load=1000 meanLoad=550 fractionUsed=100.00% meanUtil=55.00% capacity=1000]
182+
[mmaid=1] load summary for dim=WriteBandwidth (s1): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
183+
[mmaid=1] load summary for dim=ByteSize (s1): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
184+
[mmaid=1] load summary for dim=CPURate (n1): overloadUrgent, reason: fractionUsed > 90% [load=1000 meanLoad=550 fractionUsed=100.00% meanUtil=55.00% capacity=1000]
185+
[mmaid=1] load summary for dim=CPURate (s2): loadLow, reason: load is >10% below mean [load=100 meanLoad=550 fractionUsed=10.00% meanUtil=55.00% capacity=1000]
186+
[mmaid=1] load summary for dim=WriteBandwidth (s2): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
187+
[mmaid=1] load summary for dim=ByteSize (s2): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
188+
[mmaid=1] load summary for dim=CPURate (n2): loadLow, reason: load is >10% below mean [load=100 meanLoad=550 fractionUsed=10.00% meanUtil=55.00% capacity=1000]
189+
[mmaid=1] sortTargetCandidateSetAndPick: candidates: s2(SLS:loadNormal, overloadedDimLoadSummary:loadLow), overloadedDim:CPURate, picked s2
190+
[mmaid=1] load summary for dim=CPURate (s2): loadLow, reason: load is >10% below mean [load=100 meanLoad=550 fractionUsed=10.00% meanUtil=55.00% capacity=1000]
191+
[mmaid=1] load summary for dim=WriteBandwidth (s2): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
192+
[mmaid=1] load summary for dim=ByteSize (s2): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
193+
[mmaid=1] load summary for dim=CPURate (n2): loadLow, reason: load is >10% below mean [load=100 meanLoad=550 fractionUsed=10.00% meanUtil=55.00% capacity=1000]
194+
[mmaid=1] load summary for dim=CPURate (s1): overloadUrgent, reason: fractionUsed > 90% [load=1000 meanLoad=550 fractionUsed=100.00% meanUtil=55.00% capacity=1000]
195+
[mmaid=1] load summary for dim=WriteBandwidth (s1): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
196+
[mmaid=1] load summary for dim=ByteSize (s1): loadNormal, reason: load is within 5% of mean [load=0 meanLoad=0 fractionUsed=0.00% meanUtil=0.00% capacity=1000]
197+
[mmaid=1] load summary for dim=CPURate (n1): overloadUrgent, reason: fractionUsed > 90% [load=1000 meanLoad=550 fractionUsed=100.00% meanUtil=55.00% capacity=1000]
198+
[mmaid=1] can add load to n2s2: true targetSLS[(store=loadNormal worst=WriteBandwidth cpu=loadLow writes=loadNormal bytes=loadNormal node=loadLow high_disk=false frac_pending=0.00,0.00(true))] srcSLS[(store=overloadUrgent worst=CPURate cpu=overloadUrgent writes=loadNormal bytes=loadNormal node=overloadUrgent high_disk=false frac_pending=0.00,0.00(true))]
199+
[mmaid=1] result(success): shedding r1 lease from s1 to s2 [change:r1=[transfer_to=2 cids=1,2]] with resulting loads source:[cpu:1µs/s, write-bandwidth:0 B/s, byte-size:0 B] target:[cpu:100ns/s, write-bandwidth:0 B/s, byte-size:0 B] (means: [cpu:550ns/s, write-bandwidth:0 B/s, byte-size:0 B]) (frac_pending: (src:0.00,target:0.00) (src:0.00,target:0.00))
200+
[mmaid=1] skipping replica transfers for s1 to try more leases next time
201+
[mmaid=1] rebalancing pass shed: {s1}
202+
pending(2)
203+
change-id=1 store-id=1 node-id=1 range-id=1 load-delta=[cpu:0s/s, write-bandwidth:0 B/s, byte-size:0 B] start=0s gc=1m0s
204+
prev=(replica-id=1 type=VOTER_FULL leaseholder=true)
205+
next=(replica-id=1 type=VOTER_FULL)
206+
change-id=2 store-id=2 node-id=2 range-id=1 load-delta=[cpu:0s/s, write-bandwidth:0 B/s, byte-size:0 B] start=0s gc=1m0s
207+
prev=(replica-id=2 type=VOTER_FULL)
208+
next=(replica-id=2 type=VOTER_FULL leaseholder=true)

pkg/kv/kvserver/allocator/storepool/store_pool.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,21 @@ func (sp *StorePool) GetStores() map[roachpb.StoreID]roachpb.StoreDescriptor {
713713
return stores
714714
}
715715

716+
// GetStoreStatuses returns a map of store ID to store status for all known stores.
717+
// TODO(wenyihu6): optimize for the allocation cost on this
718+
func (sp *StorePool) GetStoreStatuses() map[roachpb.StoreID]StoreStatus {
719+
now := sp.clock.Now()
720+
timeUntilNodeDead := liveness.TimeUntilNodeDead.Get(&sp.st.SV)
721+
timeAfterNodeSuspect := liveness.TimeAfterNodeSuspect.Get(&sp.st.SV)
722+
723+
result := make(map[roachpb.StoreID]StoreStatus)
724+
sp.Details.StoreDetails.Range(func(storeID roachpb.StoreID, sd *StoreDetailMu) bool {
725+
result[storeID] = sd.status(now, timeUntilNodeDead, sp.NodeLivenessFn, timeAfterNodeSuspect)
726+
return true
727+
})
728+
return result
729+
}
730+
716731
// GetStoreDetail returns the store detail for the given storeID.
717732
func (sp *StorePool) GetStoreDetail(storeID roachpb.StoreID) *StoreDetailMu {
718733
detail, ok := sp.Details.StoreDetails.Load(storeID)
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# This test verifies MMA's write bandwidth rebalancing respects store status.
2+
#
3+
# Setup: 4 nodes, 40 ranges (RF=3). All replicas start on s1,s2,s3 with s1
4+
# as leaseholder. s4 starts dead. Heavy write load (~19 MiB/s) creates
5+
# pressure on all replicas. Replicate queue is disabled to isolate MMA behavior.
6+
#
7+
# Write bandwidth affects ALL replicas (not just leaseholder), so MMA should
8+
# move replicas to balance write load. However, since s4 is dead, MMA should
9+
# NOT move replicas there.
10+
#
11+
# Expected: MMA should avoid moving replicas to s4 (dead store).
12+
gen_cluster nodes=4
13+
----
14+
15+
setting split_queue_enabled=false
16+
----
17+
18+
setting replicate_queue_enabled=false
19+
----
20+
21+
# All ranges start on s1,s2,s3 with s1 as leaseholder. s4 has no replicas.
22+
gen_ranges ranges=40 placement_type=replica_placement
23+
{s1:*,s2,s3}:40
24+
----
25+
{s1:*,s2,s3}:40
26+
27+
# Write-heavy workload (rw_ratio=0 means 100% writes).
28+
# High write bandwidth creates pressure on ALL replicas, not just leaseholder.
29+
# This forces MMA to move replicas, not just leases.
30+
gen_load rate=20000 rw_ratio=0 min_block=1000 max_block=1000
31+
----
32+
19 MiB/s goodput
33+
34+
# Node 4 becomes dead.
35+
set_status node=4 liveness=dead
36+
----
37+
38+
# Assert s4 (dead) should have 0 replicas.
39+
assertion type=stat stat=replicas ticks=6 exact_bound=0 stores=(4)
40+
----
41+
42+
eval duration=6m seed=42 metrics=(replicas,leases,write_bytes_per_second) cfgs=(mma-only)
43+
----
44+
leases#1: first: [s1=40, s2=0, s3=0, s4=0] (stddev=17.32, mean=10.00, sum=40)
45+
leases#1: last: [s1=35, s2=0, s3=0, s4=5] (stddev=14.58, mean=10.00, sum=40)
46+
leases#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%] (sum=0%)
47+
replicas#1: first: [s1=40, s2=40, s3=40, s4=0] (stddev=17.32, mean=30.00, sum=120)
48+
replicas#1: last: [s1=35, s2=40, s3=40, s4=5] (stddev=14.58, mean=30.00, sum=120)
49+
replicas#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%] (sum=0%)
50+
write_bytes_per_second#1: last: [s1=17489677, s2=19989215, s3=19989215, s4=2499537] (stddev=7284303.98, mean=14991911.00, sum=59967644)
51+
write_bytes_per_second#1: thrash_pct: [s1=1%, s2=27%, s3=27%, s4=2%] (sum=57%)
52+
artifacts[mma-only]: 50eedac335e03fb8
53+
failed assertion sample 1
54+
stat=replicas value=(=0.00) ticks=6
55+
store=4 stat=5.00
56+
store=4 stat=5.00
57+
store=4 stat=5.00
58+
store=4 stat=5.00
59+
store=4 stat=5.00
60+
store=4 stat=5.00
61+
store=4 stat=5.00
62+
==========================

pkg/kv/kvserver/mma_store_rebalancer.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ func (m *mmaStoreRebalancer) start(ctx context.Context, stopper *stop.Stopper) {
125125
// rebalance may return true if errors happen in the process and fail to apply
126126
// the changes successfully.
127127
func (m *mmaStoreRebalancer) rebalance(ctx context.Context, periodicCall bool) bool {
128+
m.mma.UpdateStoreStatus(ctx, m.as.GetMMAStoreStatuses())
128129
knownStoresByMMA := m.mma.KnownStores()
129130
storeLeaseholderMsg, numIgnoredRanges := m.store.MakeStoreLeaseholderMsg(ctx, knownStoresByMMA)
130131
if numIgnoredRanges > 0 {

pkg/kv/kvserver/mmaintegration/BUILD.bazel

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ go_library(
77
"allocator_sync.go",
88
"mma_conversion.go",
99
"store_load_msg.go",
10+
"store_status.go",
1011
"testing_knobs.go",
1112
"thrashing.go",
1213
],
@@ -16,6 +17,7 @@ go_library(
1617
"//pkg/kv/kvpb",
1718
"//pkg/kv/kvserver/allocator",
1819
"//pkg/kv/kvserver/allocator/mmaprototype",
20+
"//pkg/kv/kvserver/allocator/storepool",
1921
"//pkg/kv/kvserver/kvserverbase",
2022
"//pkg/roachpb",
2123
"//pkg/settings/cluster",
@@ -28,15 +30,20 @@ go_library(
2830

2931
go_test(
3032
name = "mmaintegration_test",
31-
srcs = ["mma_conversion_test.go"],
33+
srcs = [
34+
"mma_conversion_test.go",
35+
"store_status_test.go",
36+
],
3237
data = glob(["testdata/**"]),
3338
embed = [":mmaintegration"],
3439
deps = [
3540
"//pkg/kv/kvpb",
3641
"//pkg/kv/kvserver/allocator",
3742
"//pkg/kv/kvserver/allocator/mmaprototype",
43+
"//pkg/kv/kvserver/allocator/storepool",
3844
"//pkg/roachpb",
3945
"//pkg/testutils/datapathutils",
46+
"//pkg/testutils/echotest",
4047
"//pkg/util/leaktest",
4148
"//pkg/util/log",
4249
"@com_github_cockroachdb_datadriven//:datadriven",

0 commit comments

Comments
 (0)