Skip to content

Commit 1128917

Browse files
craig[bot]yuzefovich
andcommitted
Merge #143858
143858: stats: harden bucket adjustment for over-estimates r=yuzefovich a=yuzefovich We recently saw a case where we produced histogram buckets that contained NaNs. This happened because we had an over-estimate of the distinct count which was impossible to satisfy (3+ distinct count for boolean column). This commit hardens the bucket adjustment code to avoid generating NaNs in such scenarios. In particular, it audits all division arithmetic we do when building histograms to ensure that we never attempt to divide by zero - I found only one spot where it seems plausible in such "impossible to satisfy" request. Fixes: #142022. Release note: None Co-authored-by: Yahor Yuzefovich <[email protected]>
2 parents 446c5a1 + 10a2ffe commit 1128917

File tree

2 files changed

+40
-4
lines changed

2 files changed

+40
-4
lines changed

pkg/sql/stats/histogram.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,10 @@ type histogram struct {
448448
// to equal the total row count and estimated distinct count. The total row
449449
// count and estimated distinct count should not include NULL values, and the
450450
// histogram should not contain any buckets for NULL values.
451+
//
452+
// NB: it is **not** guaranteed that the returned histogram will contain the
453+
// specified distinct count (e.g., if distinctCountTotal of 3 or more is asked
454+
// from the boolean histogram).
451455
func (h *histogram) adjustCounts(
452456
ctx context.Context,
453457
compareCtx tree.CompareContext,
@@ -550,7 +554,7 @@ func (h *histogram) adjustCounts(
550554
// values in the bucket.
551555
inc = remDistinctCount * (maxDistRange / maxDistinctCountRange)
552556
// If the bucket has DistinctRange > maxDistRange (a rare but possible
553-
// occurence, see #93892) then inc will be negative. Prevent this.
557+
// occurrence, see #93892) then inc will be negative. Prevent this.
554558
if inc < 0 {
555559
inc = 0
556560
}
@@ -707,6 +711,9 @@ func getMaxVal(
707711
// histogram to include the remaining distinct values in remDistinctCount. It
708712
// also increments the counters rowCountEq, distinctCountEq, rowCountRange, and
709713
// distinctCountRange as needed.
714+
//
715+
// NB: it is **not** guaranteed that all remaining distinct values will be
716+
// covered.
710717
func (h *histogram) addOuterBuckets(
711718
ctx context.Context,
712719
compareCtx tree.CompareContext,
@@ -777,9 +784,13 @@ func (h *histogram) addOuterBuckets(
777784

778785
inc := avgRemPerBucket
779786
if countable {
780-
// Set the increment proportional to the remaining number of distinct
781-
// values in the bucket.
782-
inc = remDistinctCount * (maxDistRange / maxDistinctCountExtraBuckets)
787+
if maxDistinctCountExtraBuckets > 0 {
788+
// Set the increment proportional to the remaining number of distinct
789+
// values in the bucket.
790+
inc = remDistinctCount * (maxDistRange / maxDistinctCountExtraBuckets)
791+
} else {
792+
inc = 0
793+
}
783794
if inc < 0 {
784795
inc = 0
785796
}

pkg/sql/stats/histogram_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,31 @@ func TestAdjustCounts(t *testing.T) {
940940
{NumRange: 0, NumEq: 100, DistinctRange: 0, UpperBound: d(-60)},
941941
},
942942
},
943+
{ // Over-estimate of distinct count for bools when both values were
944+
// already sampled.
945+
h: []cat.HistogramBucket{
946+
{NumRange: 0, NumEq: 1, DistinctRange: 0, UpperBound: tree.DBoolFalse},
947+
{NumRange: 0, NumEq: 1, DistinctRange: 0, UpperBound: tree.DBoolTrue},
948+
},
949+
rowCount: 4,
950+
distinctCount: 4,
951+
expected: []cat.HistogramBucket{
952+
{NumRange: 0, NumEq: 2, DistinctRange: 0, UpperBound: tree.DBoolFalse},
953+
{NumRange: 0, NumEq: 2, DistinctRange: 0, UpperBound: tree.DBoolTrue},
954+
},
955+
},
956+
{ // Over-estimate of distinct count for bools when only 'false' was
957+
// already sampled (#142022).
958+
h: []cat.HistogramBucket{
959+
{NumRange: 0, NumEq: 1, DistinctRange: 0, UpperBound: tree.DBoolFalse},
960+
},
961+
rowCount: 4,
962+
distinctCount: 4,
963+
expected: []cat.HistogramBucket{
964+
{NumRange: 0, NumEq: 2, DistinctRange: 0, UpperBound: tree.DBoolFalse},
965+
{NumRange: 0, NumEq: 2, DistinctRange: 0, UpperBound: tree.DBoolTrue},
966+
},
967+
},
943968
}
944969

945970
ctx := context.Background()

0 commit comments

Comments
 (0)