Skip to content

Commit bf0742d

Browse files
craig[bot]sumeerbholayuzefovich
committed
158008: mma: improve commentary and test cases for config normalization r=sumeerbhola a=sumeerbhola Epic: CRDB-55052 Release note: None 158026: logictest: adjust recent test for high vmodule config r=yuzefovich a=yuzefovich As it turns out, with high vmodule on `plan_opt` file we include the query string into each logging message. We recently added some tests for canary stats that verify the query cache behavior, and we need to adjust those to ignore the optional suffix that contains the stmt. Fixes: #157981 Fixes: #157982 Fixes: #157983 Fixes: #157984 Fixes: #157985 Fixes: #157986 Fixes: #157987 Fixes: #157988 Fixes: #157989 Release note: None Co-authored-by: sumeerbhola <[email protected]> Co-authored-by: Yahor Yuzefovich <[email protected]>
3 parents b724895 + 8f03a96 + cd2172d commit bf0742d

File tree

4 files changed

+333
-37
lines changed

4 files changed

+333
-37
lines changed

pkg/kv/kvserver/allocator/mmaprototype/cluster_state.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,8 +1663,18 @@ func (cs *clusterState) processStoreLeaseholderMsgInternal(
16631663
if rangeMsg.MaybeSpanConfIsPopulated {
16641664
normSpanConfig, err := makeNormalizedSpanConfig(&rangeMsg.MaybeSpanConf, cs.constraintMatcher.interner)
16651665
if err != nil {
1666-
// TODO(kvoli): Should we log as a warning here, or return further back out?
1667-
panic(err)
1666+
if normSpanConfig == nil {
1667+
// TODO: the roachpb.SpanConfig violated the basic requirements
1668+
// documented in the proto. We need to ensure that the checks that
1669+
// happened here are also done when the user set the ZoneConfig, so
1670+
// that we can reject such violations up front. At this point in the
1671+
// code we have no way of continuing, so we panic, but we must ensure
1672+
// we never get here in production for user specified input.
1673+
panic(err)
1674+
} else {
1675+
log.KvDistribution.Warningf(
1676+
ctx, "range r%v span config had errors in normalization: %v", rangeMsg.RangeID, err)
1677+
}
16681678
}
16691679
rs.conf = normSpanConfig
16701680
}

pkg/kv/kvserver/allocator/mmaprototype/constraint.go

Lines changed: 125 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -128,18 +128,87 @@ func (ic internedConstraint) less(b internedConstraint) bool {
128128
// constraints are in increasing order using internedConstraint.less.
129129
type constraintsConj []internedConstraint
130130

131+
// conjunctionRelationship describes the binary relationship between sets of
132+
// stores satisfying two constraint conjunctions. A constraint conjunction
133+
// (constraintsConj) represents all possible sets of stores that satisfy all
134+
// constraints in the conjunction. For example, the conjunction [+region=a,
135+
// +zone=a1] represents all possible sets of stores located in region "a" and
136+
// zone "a1".
137+
//
138+
// This relationship (between two constraintsConj denoted A and B in their
139+
// enum comments) is computed purely by comparing the conjuncts of A and B,
140+
// without knowledge of which stores actually exist in the cluster. In some
141+
// cases, specifically conjEqualSet, conjStrictSubset, conjStrictSuperset,
142+
// these can be understood to hold for any possible concrete set of stores. In
143+
// other cases, specifically conjNonIntersecting and conjPossiblyIntersecting,
144+
// these are a best-effort guess based on the structure of the conjuncts
145+
// alone.
146+
//
147+
// The best-effort relationship computation assumes that if two conjuncts are
148+
// not equal their sets are non-overlapping. This simplifying assumption is ok
149+
// since this relationship feeds into a best-effort structural normalization.
131150
type conjunctionRelationship int
132151

133-
// Relationship between conjunctions used for structural normalization. This
134-
// relationship is solely defined based on the conjunctions, and not based on
135-
// what stores actually match. It simply assumes that if two conjuncts are not
136-
// equal their sets are non-overlapping. This simplifying assumption is made
137-
// since we are only trying to do a best-effort structural normalization.
138152
const (
139-
conjIntersecting conjunctionRelationship = iota
153+
// conjPossiblyIntersecting: This is an indeterminate case: The structural
154+
// relationship alone cannot determine how the store sets relate; the actual
155+
// set of stores in the cluster would need to be known. The store sets may
156+
// be disjoint, overlapping, or even have a subset relationship, depending
157+
// on which stores actually exist. In practice, this is the case in which A
158+
// and B share some constraints, but differ in others.
159+
//
160+
// Example: A=[+region=a, +zone=a1], B=[+region=a, -zone=a2].
161+
// - if all stores are in a2, B is empty and thus a subset of A.
162+
// - if all stores are in a1, A and B are equal.
163+
// - if only a1 and a2 have stores, then -zone=a2 is equivalent to +zone=a1,
164+
// so A and B are equal.
165+
// - if only a2 and a3 have stores, then A is empty and thus a strict
166+
// subset of B, which is not empty.
167+
// - if a1, a2, and a3 have stores, then A and B intersect but are not equal:
168+
// - stores in a1 match both A and B.
169+
// - stores in a2 match neither constraint
170+
// - stores in a3 match only B
171+
// and thus B is a strict subset of A.
172+
// Note how the relationship between A and B varies based on the actual set of
173+
// stores: sometimes B contains A, and sometimes the other way around.
174+
conjPossiblyIntersecting conjunctionRelationship = iota
175+
176+
// conjEqualSet: A and B have identical constraint lists, so they represent
177+
// the same set of stores.
178+
//
179+
// Example: A=[+region=a, +zone=a1], B=A
140180
conjEqualSet
181+
182+
// conjStrictSubset: A has more constraints than B, so A's store set is a
183+
// strict subset of B's store set. Every store matching A also matches B,
184+
// but some stores matching B do not match A.
185+
//
186+
// Example: A=[+region=a, +zone=a1], B=[+region=a]
187+
// A matches stores in region=a AND zone=a1.
188+
// B matches all stores in region=a (any zone).
189+
// Therefore, A's store set ⊆ B's store set.
141190
conjStrictSubset
191+
192+
// conjStrictSuperset: A has fewer constraints than B, so A's store set is a
193+
// strict superset of B's store set. Every store matching B also matches A,
194+
// but some stores matching A do not match B.
195+
//
196+
// Example: A=[+region=a], B=[+region=a, +zone=a1]
197+
// A matches all stores in region=a.
198+
// B matches only stores in region=a AND zone=a1.
199+
// Therefore, A's store set ⊇ B's store set.
142200
conjStrictSuperset
201+
202+
// conjNonIntersecting: This is another indeterminate case. A and B have
203+
// no constraints in common, so we assume their store sets are disjoint
204+
// (no store can match both).
205+
//
206+
// Example: A=[+region=a], B=[+region=b]
207+
// A matches stores in region=a, B matches stores in region=b.
208+
// Since a store cannot be in both regions, the sets are disjoint.
209+
// Example: A=[+region=a], B=[+zone=a1]
210+
// If zone=a1 happens to be in region=a, then the disjoint result is
211+
// not correct.
143212
conjNonIntersecting
144213
)
145214

@@ -191,7 +260,7 @@ func (cc constraintsConj) relationship(b constraintsConj) conjunctionRelationshi
191260
}
192261
// (extraInB > 0 && extraInCC > 0)
193262
if inBoth > 0 {
194-
return conjIntersecting
263+
return conjPossiblyIntersecting
195264
}
196265
return conjNonIntersecting
197266
}
@@ -220,6 +289,19 @@ type internedLeasePreference struct {
220289
// SpanConfig for which we don't have a normalized value. The rest of the
221290
// allocator code works with normalizedSpanConfig. Due to the infrequent
222291
// nature of this, we don't attempt to reduce memory allocations.
292+
//
293+
// It does:
294+
//
295+
// - Basic normalization of the constraints and voterConstraints, to specify
296+
// the number of replicas for every constraints conjunction, and ensure that
297+
// the sum adds up to the required number of replicas. These are
298+
// requirements documented in roachpb.SpanConfig. If this basic
299+
// normalization fails, it returns a nil normalizedSpanConfig and an error.
300+
//
301+
// - Structural normalization: see comment in doStructuralNormalization. An
302+
// error in this step causes it to return a non-nil normalizedSpanConfig,
303+
// with an error, so that the error can be surfaced somehow while keeping
304+
// the system running.
223305
func makeNormalizedSpanConfig(
224306
conf *roachpb.SpanConfig, interner *stringInterner,
225307
) (*normalizedSpanConfig, error) {
@@ -263,9 +345,16 @@ func makeNormalizedSpanConfig(
263345
leasePreferences: lps,
264346
interner: interner,
265347
}
266-
return doStructuralNormalization(nConf)
348+
err = doStructuralNormalization(nConf)
349+
return nConf, err
267350
}
268351

352+
// normalizeConstraints normalizes and interns the given constraints. Every
353+
// internedConstraintsConjunction has numReplicas > 0, and the sum of these
354+
// equals the parameter numReplicas.
355+
//
356+
// It returns an error if the input constraints don't satisfy the requirements
357+
// documented in roachpb.SpanConfig related to NumReplicas.
269358
func normalizeConstraints(
270359
constraints []roachpb.ConstraintsConjunction, numReplicas int32, interner *stringInterner,
271360
) ([]internedConstraintsConjunction, error) {
@@ -322,13 +411,14 @@ func normalizeConstraints(
322411
// "strictness" comment in roachpb.SpanConfig which now requires users to
323412
// repeat the information).
324413
//
325-
// This function does some structural normalization even when returning an
326-
// error. See the under-specified voter constraint examples in the datadriven
327-
// test -- we sometimes see these in production settings, and we want to fix
328-
// ones that we can, and raise an error for users to fix their configs.
329-
func doStructuralNormalization(conf *normalizedSpanConfig) (*normalizedSpanConfig, error) {
414+
// This function mutates the conf argument. It does some structural
415+
// normalization even when returning an error. See the under-specified voter
416+
// constraint examples in the datadriven test -- we sometimes see these in
417+
// production settings, and we want to fix ones that we can, and raise an
418+
// error for users to fix their configs.
419+
func doStructuralNormalization(conf *normalizedSpanConfig) error {
330420
if len(conf.constraints) == 0 || len(conf.voterConstraints) == 0 {
331-
return conf, nil
421+
return nil
332422
}
333423
// Relationships between each voter constraint and each all replica
334424
// constraint.
@@ -359,9 +449,25 @@ func doStructuralNormalization(conf *normalizedSpanConfig) (*normalizedSpanConfi
359449
slices.SortFunc(rels, func(a, b relationshipVoterAndAll) int {
360450
return cmp.Compare(a.voterAndAllRel, b.voterAndAllRel)
361451
})
362-
// First are the intersecting constraints, which cause an error.
452+
// First are the intersecting constraints, which cause an error (though we
453+
// proceed with normalization). This is because when there are both shared
454+
// and non shared conjuncts, we cannot be certain that the conjunctions are
455+
// non-intersecting. When the conjunctions are intersecting, we cannot
456+
// promote from one to the other to fill out the set of conjunctions.
457+
//
458+
// Example 1: +region=a,+zone=a1 and +region=a,+zone=a2 are classified as
459+
// conjPossiblyIntersecting, but we could do better in knowing that the
460+
// conjunctions are non-intersecting since zone=a1 and zone=a2 are disjoint.
461+
//
462+
// TODO(sumeer): improve the case of example 1.
463+
//
464+
// Example 2: +region=a,+zone=a1 and +region=a,-zone=a2 are classified as
465+
// conjPossiblyIntersecting. And if there happens to be a zone=a3 in the
466+
// region, they are actually intersecting. We cannot do better since we
467+
// don't know the semantics of regions, zones or the universe of possible
468+
// values of the zone.
363469
index := 0
364-
for rels[index].voterAndAllRel == conjIntersecting {
470+
for rels[index].voterAndAllRel == conjPossiblyIntersecting {
365471
index++
366472
}
367473
var err error
@@ -583,9 +689,9 @@ func doStructuralNormalization(conf *normalizedSpanConfig) (*normalizedSpanConfi
583689
slices.SortFunc(rels, func(a, b relationshipVoterAndAll) int {
584690
return cmp.Compare(a.voterAndAllRel, b.voterAndAllRel)
585691
})
586-
// Ignore conjIntersecting.
692+
// Ignore conjPossiblyIntersecting.
587693
index = 0
588-
for rels[index].voterAndAllRel == conjIntersecting {
694+
for rels[index].voterAndAllRel == conjPossiblyIntersecting {
589695
index++
590696
}
591697
voterConstraintHasEqualityWithConstraint := make([]bool, len(conf.voterConstraints))
@@ -647,7 +753,7 @@ func doStructuralNormalization(conf *normalizedSpanConfig) (*normalizedSpanConfi
647753
}
648754
}
649755

650-
return conf, err
756+
return err
651757
}
652758

653759
type replicaKindIndex int32

0 commit comments

Comments
 (0)