Skip to content

Commit 93721ac

Browse files
authored
Merge pull request #149625 from cockroachdb/blathers/backport-release-25.2-149279
release-25.2: stats: improve handling of some expected errors
2 parents 481da68 + 633a7bd commit 93721ac

File tree

8 files changed

+46
-26
lines changed

8 files changed

+46
-26
lines changed

docs/generated/settings/settings-for-tenants.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ sql.stats.multi_column_collection.enabled boolean true multi-column statistics c
369369
sql.stats.non_default_columns.min_retention_period duration 24h0m0s minimum retention period for table statistics collected on non-default columns application
370370
sql.stats.non_indexed_json_histograms.enabled boolean false set to true to collect table statistics histograms on non-indexed JSON columns application
371371
sql.stats.persisted_rows.max integer 1000000 maximum number of rows of statement and transaction statistics that will be persisted in the system tables before compaction begins application
372-
sql.stats.post_events.enabled boolean false if set, an event is logged for every CREATE STATISTICS job application
372+
sql.stats.post_events.enabled boolean false if set, an event is logged for every successful CREATE STATISTICS job application
373373
sql.stats.response.max integer 20000 the maximum number of statements and transaction stats returned in a CombinedStatements request application
374374
sql.stats.response.show_internal.enabled boolean false controls if statistics for internal executions should be returned by the CombinedStatements and if internal sessions should be returned by the ListSessions endpoints. These endpoints are used to display statistics on the SQL Activity pages application
375375
sql.stats.system_tables.enabled boolean true when true, enables use of statistics on system tables by the query optimizer application

docs/generated/settings/settings.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@
324324
<tr><td><div id="setting-sql-stats-non-default-columns-min-retention-period" class="anchored"><code>sql.stats.non_default_columns.min_retention_period</code></div></td><td>duration</td><td><code>24h0m0s</code></td><td>minimum retention period for table statistics collected on non-default columns</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
325325
<tr><td><div id="setting-sql-stats-non-indexed-json-histograms-enabled" class="anchored"><code>sql.stats.non_indexed_json_histograms.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>set to true to collect table statistics histograms on non-indexed JSON columns</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
326326
<tr><td><div id="setting-sql-stats-persisted-rows-max" class="anchored"><code>sql.stats.persisted_rows.max</code></div></td><td>integer</td><td><code>1000000</code></td><td>maximum number of rows of statement and transaction statistics that will be persisted in the system tables before compaction begins</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
327-
<tr><td><div id="setting-sql-stats-post-events-enabled" class="anchored"><code>sql.stats.post_events.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>if set, an event is logged for every CREATE STATISTICS job</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
327+
<tr><td><div id="setting-sql-stats-post-events-enabled" class="anchored"><code>sql.stats.post_events.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>if set, an event is logged for every successful CREATE STATISTICS job</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
328328
<tr><td><div id="setting-sql-stats-response-max" class="anchored"><code>sql.stats.response.max</code></div></td><td>integer</td><td><code>20000</code></td><td>the maximum number of statements and transaction stats returned in a CombinedStatements request</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
329329
<tr><td><div id="setting-sql-stats-response-show-internal-enabled" class="anchored"><code>sql.stats.response.show_internal.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>controls if statistics for internal executions should be returned by the CombinedStatements and if internal sessions should be returned by the ListSessions endpoints. These endpoints are used to display statistics on the SQL Activity pages</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
330330
<tr><td><div id="setting-sql-stats-system-tables-enabled" class="anchored"><code>sql.stats.system_tables.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>when true, enables use of statistics on system tables by the query optimizer</td><td>Serverless/Dedicated/Self-Hosted</td></tr>

pkg/sql/create_stats.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ import (
4444
var createStatsPostEvents = settings.RegisterBoolSetting(
4545
settings.ApplicationLevel,
4646
"sql.stats.post_events.enabled",
47-
"if set, an event is logged for every CREATE STATISTICS job",
47+
"if set, an event is logged for every successful CREATE STATISTICS job",
4848
false,
4949
settings.WithPublic)
5050

@@ -854,6 +854,8 @@ func (r *createStatsResumer) Resume(ctx context.Context, execCtx interface{}) er
854854
return nil
855855
}
856856

857+
// Note that we'll log an event even if the stats collection didn't occur
858+
// due to a benign error.
857859
// TODO(rytaft): This creates a new transaction for the CREATE STATISTICS
858860
// event. It must be different from the CREATE STATISTICS transaction,
859861
// because that transaction must be read-only. In the future we may want

pkg/sql/distsql_physical_planner.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4060,6 +4060,10 @@ func (dsp *DistSQLPlanner) createPhysPlanForPlanNode(
40604060
// Partial stats collections scan a different index for each column.
40614061
numIndexes = len(details.ColumnStats)
40624062
}
4063+
// If the error has pgcode.ObjectNotInPrerequisiteState, we don't
4064+
// swallow it here and propagate it to the client (since we're
4065+
// running EXPLAIN or EXPLAIN ANALYZE, the caller would be
4066+
// interested to know about all errors, benign included).
40634067
plan, err = dsp.createPlanForCreateStats(
40644068
ctx, planCtx, planCtx.planner.SemaCtx(), 0 /* jobID */, details,
40654069
numIndexes, 0, /* curIndex */

pkg/sql/distsql_plan_stats.go

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"github.com/cockroachdb/cockroach/pkg/sql/catalog/schemaexpr"
2222
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
2323
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
24-
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
2524
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
2625
"github.com/cockroachdb/cockroach/pkg/sql/parser"
2726
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
@@ -349,10 +348,9 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
349348
)
350349

351350
var stat *stats.TableStatistic
352-
var histogram []cat.HistogramBucket
353-
// Find the statistic and histogram from the newest table statistic for our
354-
// column that is not partial and not forecasted. The first one we find will
355-
// be the latest due to the newest to oldest ordering property of the cache.
351+
// Find the statistic from the newest table statistic for our column that is
352+
// not partial and not forecasted. The first one we find will be the latest
353+
// due to the newest to oldest ordering property of the cache.
356354
for _, t := range tableStats {
357355
if len(t.ColumnIDs) == 1 && column.GetID() == t.ColumnIDs[0] &&
358356
!t.IsPartial() && !t.IsMerged() && !t.IsForecast() {
@@ -372,7 +370,6 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
372370
)
373371
}
374372
stat = t
375-
histogram = t.Histogram
376373
break
377374
}
378375
}
@@ -382,10 +379,18 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
382379
"column %s does not have a prior statistic",
383380
column.GetName())
384381
}
385-
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx, planCtx.EvalContext(), histogram)
382+
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx, planCtx.EvalContext(), stat.Histogram)
386383
if err != nil {
387384
return nil, err
388385
}
386+
if lowerBound == nil {
387+
return nil, pgerror.Newf(
388+
pgcode.ObjectNotInPrerequisiteState,
389+
"only outer or NULL bounded buckets exist in %s@%s (table ID %d, column IDs %v), "+
390+
"so partial stats cannot be collected",
391+
scan.desc.GetName(), scan.index.GetName(), stat.TableID, stat.ColumnIDs,
392+
)
393+
}
389394
extremesSpans, err := bounds.ConstructUsingExtremesSpans(lowerBound, upperBound, scan.index)
390395
if err != nil {
391396
return nil, err
@@ -724,6 +729,11 @@ func (dsp *DistSQLPlanner) createStatsPlan(
724729
numIndexes, curIndex), nil
725730
}
726731

732+
// createPlanForCreateStats creates the DistSQL plan to perform the table stats
733+
// collection according to CreateStatsDetails.
734+
//
735+
// If the returned error has pgcode.ObjectNotInPrerequisiteState, it might be
736+
// benign and the caller might want to swallow it, depending on the context.
727737
func (dsp *DistSQLPlanner) createPlanForCreateStats(
728738
ctx context.Context,
729739
planCtx *PlanningCtx,
@@ -781,6 +791,17 @@ func (dsp *DistSQLPlanner) planAndRunCreateStats(
781791

782792
physPlan, err := dsp.createPlanForCreateStats(ctx, planCtx, semaCtx, jobId, details, numIndexes, curIndex)
783793
if err != nil {
794+
if pgerror.GetPGCode(err) == pgcode.ObjectNotInPrerequisiteState {
795+
// This error is benign, so we'll swallow it. Concretely, this means
796+
// that we'll proceed with collecting partial stats if there are
797+
// more to do and the stats job overall will be marked as having
798+
// "succeeded". Even though this might seem confusing, it's a better
799+
// trade-off than having auto partial stats fail repeatedly due to
800+
// expected conditions (like a lower bound doesn't exist) raising
801+
// concerns for users. See #149279 for more discussion.
802+
log.Infof(ctx, "job %d: stats collection is swallowing benign error %v", jobId, err)
803+
return nil
804+
}
784805
return err
785806
}
786807

pkg/sql/logictest/testdata/logic_test/distsql_stats

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2702,10 +2702,10 @@ statement ok
27022702
CREATE TABLE xyz (x INT, y INT, z INT, INDEX (x, y));
27032703

27042704
statement error pq: column x does not have a prior statistic
2705-
CREATE STATISTICS xyz_x ON x FROM xyz USING EXTREMES;
2705+
EXPLAIN ANALYZE CREATE STATISTICS xyz_x ON x FROM xyz USING EXTREMES;
27062706

27072707
statement error pq: the latest full statistic for column a has no histogram
2708-
CREATE STATISTICS u_partial ON a FROM u USING EXTREMES;
2708+
EXPLAIN ANALYZE CREATE STATISTICS u_partial ON a FROM u USING EXTREMES;
27092709

27102710
statement error pq: table xy does not contain a non-partial forward index with y as a prefix column
27112711
CREATE STATISTICS xy_y_partial ON y FROM xy USING EXTREMES;
@@ -2724,8 +2724,8 @@ CREATE STATISTICS only_null_stat ON a FROM only_null;
27242724
statement ok
27252725
SELECT crdb_internal.clear_table_stats_cache();
27262726

2727-
statement error pq: only outer or NULL bounded buckets exist in the index, so partial stats cannot be collected
2728-
CREATE STATISTICS only_null_partial ON a FROM only_null USING EXTREMES;
2727+
statement error pq: only outer or NULL bounded buckets exist in only_null@only_null_a_idx \(table ID \d+, column IDs \[1\]\), so partial stats cannot be collected
2728+
EXPLAIN ANALYZE CREATE STATISTICS only_null_partial ON a FROM only_null USING EXTREMES;
27292729

27302730
statement ok
27312731
CREATE INDEX ON xy (y) WHERE y > 5;

pkg/sql/stats/bounds/BUILD.bazel

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ go_library(
1010
"//pkg/sql/catalog/catenumpb",
1111
"//pkg/sql/opt/cat",
1212
"//pkg/sql/opt/constraint",
13-
"//pkg/sql/pgwire/pgcode",
14-
"//pkg/sql/pgwire/pgerror",
1513
"//pkg/sql/sem/eval",
1614
"//pkg/sql/sem/tree",
1715
"//pkg/sql/sem/tree/treecmp",

pkg/sql/stats/bounds/extremes.go

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ import (
1212
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
1313
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
1414
"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
15-
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
16-
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
1715
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
1816
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
1917
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree/treecmp"
@@ -80,6 +78,9 @@ func ConstructUsingExtremesSpans(
8078

8179
// GetUsingExtremesBounds returns a tree.Datum representing the exclusive upper
8280
// and exclusive lower bounds of the USING EXTREMES span for partial statistics.
81+
//
82+
// lowerBound can be nil which is the case when the exclusive lower bound
83+
// doesn't exist.
8384
func GetUsingExtremesBounds(
8485
ctx context.Context, evalCtx *eval.Context, histogram []cat.HistogramBucket,
8586
) (lowerBound tree.Datum, upperBound tree.Datum, _ error) {
@@ -95,8 +96,8 @@ func GetUsingExtremesBounds(
9596
upperBound = histogram[len(histogram)-2].UpperBound
9697
}
9798

98-
// Pick the earliest lowerBound that is not null and isn't an outer bucket,
99-
// but if none exist, return error
99+
// Pick the earliest lowerBound that is not null and isn't an outer bucket.
100+
// We'll return nil if none exist which the caller is required to handle.
100101
for i := range histogram {
101102
hist := &histogram[i]
102103
if cmp, err := hist.UpperBound.Compare(ctx, evalCtx, tree.DNull); err != nil {
@@ -106,11 +107,5 @@ func GetUsingExtremesBounds(
106107
break
107108
}
108109
}
109-
if lowerBound == nil {
110-
return lowerBound, nil,
111-
pgerror.Newf(
112-
pgcode.ObjectNotInPrerequisiteState,
113-
"only outer or NULL bounded buckets exist in the index, so partial stats cannot be collected")
114-
}
115110
return lowerBound, upperBound, nil
116111
}

0 commit comments

Comments
 (0)