Skip to content

Commit fa99540

Browse files
craig[bot]tbgpav-kv
committed
156938: mmaprototype: introduce store status r=tbg a=tbg This introduces the `Status` struct and hooks it up to the `TestClusterState` test. It doesn't yet attach any actual semantics to the new member of `storeStatus` that is being added as part of doing so. This will happen in follow-up commits, loosely replacing the integration points removed in #156933. Touches #156776. Epic: CRDB-55052 156999: kvserver: deflake TestPromoteNonVoterInAddVoter r=tbg a=pav-kv Fixes #156701 Co-authored-by: Tobias Grieger <[email protected]> Co-authored-by: Pavel Kalinnikov <[email protected]>
3 parents 486ae5d + 3e61da3 + 77832f1 commit fa99540

File tree

10 files changed

+397
-44
lines changed

10 files changed

+397
-44
lines changed

pkg/kv/kvserver/allocator/mmaprototype/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ go_library(
1515
"rebalance_advisor.go",
1616
"store_load_summary.go",
1717
"store_set.go",
18+
"store_status.go",
1819
"top_k_replicas.go",
1920
],
2021
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/mmaprototype",

pkg/kv/kvserver/allocator/mmaprototype/cluster_state.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ type pendingReplicaChange struct {
600600
// storeState maintains the complete state about a store as known to the
601601
// allocator.
602602
type storeState struct {
603+
status Status
603604
storeLoad
604605
StoreAttributesAndLocality
605606
adjusted struct {
@@ -1822,6 +1823,11 @@ func (cs *clusterState) setStore(sal StoreAttributesAndLocality) {
18221823
if !ok {
18231824
// This is the first time seeing this store.
18241825
ss := newStoreState()
1826+
// At this point, the store's health is unknown. It will need to be marked
1827+
// as healthy separately. Until we know more, we won't place leases or
1828+
// replicas on it (nor will we try to shed any that are already reported to
1829+
// have replicas on it).
1830+
ss.status = MakeStatus(HealthUnknown, LeaseDispositionRefusing, ReplicaDispositionRefusing)
18251831
ss.localityTiers = cs.localityTierInterner.intern(sal.locality())
18261832
ss.overloadStartTime = cs.ts.Now()
18271833
ss.overloadEndTime = cs.ts.Now()

pkg/kv/kvserver/allocator/mmaprototype/cluster_state_test.go

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,53 @@ func parseSecondaryLoadVector(t *testing.T, in string) SecondaryLoadVector {
6363
return vec
6464
}
6565

66+
func parseStatusFromArgs(t *testing.T, d *datadriven.TestData) Status {
67+
var status Status
68+
if d.HasArg("health") {
69+
healthStr := dd.ScanArg[string](t, d, "health")
70+
found := false
71+
for i := Health(0); i < healthCount; i++ {
72+
if i.String() == healthStr {
73+
status.Health = i
74+
found = true
75+
break
76+
}
77+
}
78+
if !found {
79+
t.Fatalf("unknown health: %s", healthStr)
80+
}
81+
}
82+
if d.HasArg("leases") {
83+
leaseStr := dd.ScanArg[string](t, d, "leases")
84+
found := false
85+
for i := LeaseDisposition(0); i < leaseDispositionCount; i++ {
86+
if i.String() == leaseStr {
87+
status.Disposition.Lease = i
88+
found = true
89+
break
90+
}
91+
}
92+
if !found {
93+
t.Fatalf("unknown lease disposition: %s", leaseStr)
94+
}
95+
}
96+
if d.HasArg("replicas") {
97+
replicaStr := dd.ScanArg[string](t, d, "replicas")
98+
found := false
99+
for i := ReplicaDisposition(0); i < replicaDispositionCount; i++ {
100+
if i.String() == replicaStr {
101+
status.Disposition.Replica = i
102+
found = true
103+
break
104+
}
105+
}
106+
if !found {
107+
t.Fatalf("unknown replica disposition: %s", replicaStr)
108+
}
109+
}
110+
return status
111+
}
112+
66113
func parseStoreLoadMsg(t *testing.T, in string) StoreLoadMsg {
67114
var msg StoreLoadMsg
68115
for _, v := range strings.Fields(in) {
@@ -333,8 +380,10 @@ func TestClusterState(t *testing.T) {
333380
ss := cs.stores[storeID]
334381
ns := cs.nodes[ss.NodeID]
335382
fmt.Fprintf(&buf,
336-
"store-id=%v node-id=%v reported=%v adjusted=%v node-reported-cpu=%v node-adjusted-cpu=%v seq=%d\n",
337-
ss.StoreID, ss.NodeID, ss.reportedLoad, ss.adjusted.load, ns.ReportedCPU, ns.adjustedCPU, ss.loadSeqNum,
383+
"store-id=%v node-id=%v status=%s reported=%v adjusted=%v node-reported-cpu=%v node-adjusted-cpu=%v seq"+
384+
"=%d\n",
385+
ss.StoreID, ss.NodeID, ss.status, ss.reportedLoad, ss.adjusted.load, ns.ReportedCPU, ns.adjustedCPU,
386+
ss.loadSeqNum,
338387
)
339388
for ls, topk := range ss.adjusted.topKRanges {
340389
n := topk.len()
@@ -354,9 +403,22 @@ func TestClusterState(t *testing.T) {
354403
for _, next := range strings.Split(d.Input, "\n") {
355404
sal := parseStoreAttributedAndLocality(t, next)
356405
cs.setStore(sal)
406+
// For convenience, in these tests, stores start out
407+
// healthy.
408+
cs.stores[sal.StoreID].status = Status{Health: HealthOK}
357409
}
358410
return printNodeListMeta()
359411

412+
case "set-store-status":
413+
storeID := dd.ScanArg[roachpb.StoreID](t, d, "store-id")
414+
ss, ok := cs.stores[storeID]
415+
if !ok {
416+
t.Fatalf("store %d not found", storeID)
417+
}
418+
status := parseStatusFromArgs(t, d)
419+
ss.status = MakeStatus(status.Health, status.Disposition.Lease, status.Disposition.Replica)
420+
return ss.status.String()
421+
360422
case "store-load-msg":
361423
msg := parseStoreLoadMsg(t, d.Input)
362424
cs.processStoreLoadMsg(context.Background(), &msg)
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package mmaprototype
7+
8+
import (
9+
"github.com/cockroachdb/errors"
10+
"github.com/cockroachdb/redact"
11+
)
12+
13+
type (
14+
// Health represents whether the store "is around", i.e. able to participate
15+
// in replication. What this means exactly is left to the outside world. MMA
16+
// relies on the Disposition as much as possible without consulting the health
17+
// status, aided by the invariant described on Status (only healthy nodes can
18+
// have "green" dispositions). Its only (envisioned) use for explicit health
19+
// tracking are sanity checks and preferring unhealthy candidates when picking
20+
// sources (for example, when rebalancing, it may make sense to rebalance off
21+
// an unhealthy or dead node over a healthy one as this actually improves
22+
// availability).
23+
Health byte
24+
LeaseDisposition byte
25+
ReplicaDisposition byte
26+
)
27+
28+
const (
29+
// HealthUnknown is the initial state for health. It's treated as an unhealthy
30+
// state, distinguished from HealthUnhealthy to tailor logging and metrics.
31+
HealthUnknown Health = iota
32+
// HealthOK describes a store that has been determined to be alive and well.
33+
HealthOK
34+
// HealthUnhealthy describes a store that has recently become unhealthy, but
35+
// which has not been unhealthy for "extended periods of time" and is expected
36+
// to return (as determined by a higher layer)
37+
HealthUnhealthy
38+
// HealthDead describes a store that is unhealthy and has been in that state
39+
// for what is considered a "long time" or which may not return.
40+
HealthDead
41+
healthCount // sentinel
42+
)
43+
44+
const (
45+
// LeaseDispositionOK denotes that the store is in principle willing to
46+
// receive leases.
47+
LeaseDispositionOK LeaseDisposition = iota
48+
// LeaseDispositionRefusing denotes that the store should not be considered
49+
// as a target for leases.
50+
LeaseDispositionRefusing
51+
// LeaseDispositionShedding denotes that the store should actively be
52+
// considered for removal of leases.
53+
LeaseDispositionShedding
54+
leaseDispositionCount // sentinel
55+
)
56+
57+
const (
58+
// ReplicaDispositionRefusing denotes that the store can receive replicas.
59+
ReplicaDispositionOK ReplicaDisposition = iota
60+
// ReplicaDispositionRefusing denotes that the store should not be considered as
61+
// a target for replica placement.
62+
ReplicaDispositionRefusing
63+
// ReplicaDispositionShedding denotes that the store should actively be
64+
// considered for removal of replicas.
65+
ReplicaDispositionShedding
66+
replicaDispositionCount // sentinel
67+
)
68+
69+
// Disposition represents the interest a store has in accepting or
70+
// shedding leases or replicas.
71+
type Disposition struct {
72+
Lease LeaseDisposition
73+
Replica ReplicaDisposition
74+
}
75+
76+
// Status represents the health and disposition of a store. It serves as a
77+
// translation layer between the "messy" outside world in which we may have
78+
// multiple possibly conflicting health signals, and various lifecycle states.
79+
// The Status is never derived internally in mma, but is always passed in and
80+
// kept up to date through the Allocator interface.
81+
//
82+
// The contained Health primarily plays a role when looking for (lease or
83+
// replicas) sources: all other things being equal, we want to be able to
84+
// identify suspect or dead stores as more beneficial sources for, say, a
85+
// replica removal. We also want to be able to tell when a range has lost quorum
86+
// so that we don't even try to make changes in that case. On the flip side, due
87+
// to the INVARIANT below, when looking for targets, health does not typically
88+
// need to be considered as we prohibit "non-healthy" stores from claiming to
89+
// accept replicas or leases.
90+
//
91+
// The wrapped Disposition is important when looking for both targets and
92+
// sources. In principle, stores can reject leases but accept replicas and vice
93+
// versa, though not all combinations may be used in practice (because they do
94+
// not all make sense).
95+
//
96+
// The multi-metric allocator is not currently responsible for establishing
97+
// constraint conformance or for shedding, so not all aspects discussed above
98+
// are necessarily used by it yet.
99+
//
100+
// INVARIANT: if Health != HealthOK, Disposition.{Lease,Replica} = Shedding.
101+
type Status struct {
102+
Health Health
103+
Disposition Disposition
104+
}
105+
106+
func MakeStatus(h Health, l LeaseDisposition, r ReplicaDisposition) Status {
107+
s := Status{
108+
Health: h,
109+
Disposition: Disposition{Lease: l, Replica: r},
110+
}
111+
s.assertOrPanic()
112+
return s
113+
}
114+
115+
func (ss *Status) assert() error {
116+
// NB: byte values can't be negative, so we don't have to check.
117+
118+
if ss.Health >= healthCount {
119+
return errors.AssertionFailedf("invalid health status %d", ss.Health)
120+
}
121+
d := ss.Disposition
122+
if d.Lease >= leaseDispositionCount {
123+
return errors.AssertionFailedf("invalid lease disposition %d", d.Lease)
124+
}
125+
if d.Replica >= replicaDispositionCount {
126+
return errors.AssertionFailedf("invalid replica disposition %d", d.Replica)
127+
}
128+
if ss.Health != HealthOK && (ss.Disposition.Replica == ReplicaDispositionOK || ss.Disposition.Lease == LeaseDispositionOK) {
129+
return errors.AssertionFailedf("non-healthy status must not accept leases or replicas: %+v", ss)
130+
}
131+
return nil
132+
}
133+
134+
func (ss *Status) assertOrPanic() {
135+
if err := ss.assert(); err != nil {
136+
panic(err)
137+
}
138+
}
139+
140+
func (h Health) String() string {
141+
return redact.StringWithoutMarkers(h)
142+
}
143+
144+
// SafeFormat implements the redact.SafeFormatter interface.
145+
func (h Health) SafeFormat(w redact.SafePrinter, _ rune) {
146+
switch h {
147+
case HealthUnknown:
148+
w.Print("unknown")
149+
case HealthOK:
150+
w.Print("ok")
151+
case HealthUnhealthy:
152+
w.Print("unhealthy")
153+
case HealthDead:
154+
w.Print("dead")
155+
default:
156+
w.Print("unknown")
157+
}
158+
}
159+
160+
func (l LeaseDisposition) String() string {
161+
return redact.StringWithoutMarkers(l)
162+
}
163+
164+
// SafeFormat implements the redact.SafeFormatter interface.
165+
func (l LeaseDisposition) SafeFormat(w redact.SafePrinter, _ rune) {
166+
switch l {
167+
case LeaseDispositionOK:
168+
w.Print("ok")
169+
case LeaseDispositionRefusing:
170+
w.Print("refusing")
171+
case LeaseDispositionShedding:
172+
w.Print("shedding")
173+
default:
174+
w.Print("unknown")
175+
}
176+
}
177+
178+
func (r ReplicaDisposition) String() string {
179+
return redact.StringWithoutMarkers(r)
180+
}
181+
182+
// SafeFormat implements the redact.SafeFormatter interface.
183+
func (r ReplicaDisposition) SafeFormat(w redact.SafePrinter, _ rune) {
184+
switch r {
185+
case ReplicaDispositionOK:
186+
w.Print("ok")
187+
case ReplicaDispositionRefusing:
188+
w.Print("refusing")
189+
case ReplicaDispositionShedding:
190+
w.Print("shedding")
191+
default:
192+
w.Print("unknown")
193+
}
194+
}
195+
196+
func (d Disposition) String() string {
197+
return redact.StringWithoutMarkers(d)
198+
}
199+
200+
// SafeFormat implements the redact.SafeFormatter interface.
201+
func (d Disposition) SafeFormat(w redact.SafePrinter, _ rune) {
202+
if d.Lease == LeaseDispositionOK && d.Replica == ReplicaDispositionOK {
203+
w.Print("accepting all")
204+
return
205+
}
206+
207+
var refusing []redact.SafeString
208+
var shedding []redact.SafeString
209+
210+
if d.Lease == LeaseDispositionRefusing {
211+
refusing = append(refusing, redact.SafeString("leases"))
212+
} else if d.Lease == LeaseDispositionShedding {
213+
shedding = append(shedding, redact.SafeString("leases"))
214+
}
215+
216+
if d.Replica == ReplicaDispositionRefusing {
217+
refusing = append(refusing, redact.SafeString("replicas"))
218+
} else if d.Replica == ReplicaDispositionShedding {
219+
shedding = append(shedding, redact.SafeString("replicas"))
220+
}
221+
222+
first := true
223+
if len(refusing) > 0 {
224+
w.Print("refusing=")
225+
for i, s := range refusing {
226+
if i > 0 {
227+
w.Print(",")
228+
}
229+
w.Print(s)
230+
}
231+
first = false
232+
}
233+
if len(shedding) > 0 {
234+
if !first {
235+
w.Print(" ")
236+
}
237+
w.Print("shedding=")
238+
for i, s := range shedding {
239+
if i > 0 {
240+
w.Print(",")
241+
}
242+
w.Print(s)
243+
}
244+
}
245+
}
246+
247+
func (ss Status) String() string {
248+
return redact.StringWithoutMarkers(ss)
249+
}
250+
251+
// SafeFormat implements the redact.SafeFormatter interface.
252+
func (ss Status) SafeFormat(w redact.SafePrinter, _ rune) {
253+
ss.Health.SafeFormat(w, 's')
254+
w.Print(" ")
255+
ss.Disposition.SafeFormat(w, 's')
256+
}

0 commit comments

Comments
 (0)