Skip to content

Commit 84315c8

Browse files
committed
sql/stats: support collecting partial statistics on arbitrary constraints
This commit adds support to manually create partial statistics with a `WHERE` clause, where the predicate fully constrains the first column of an index. Additionally, the predicate must reference a single outer column, which must be the same as the single column that stats are being collected on. These stats are stored in `system.table_statistics` with their predicate. Part of: #93998 Release note (sql change): Users can now manually create single-column partial statistics on boolean predicate expressions that can become simple index scans. These statistics can be created by adding a constraining `WHERE` expression to `CREATE STATISTICS`. For example: ``` CREATE TABLE t (a INT PRIMARY KEY); INSERT INTO t VALUES (1), (2), (3), (4), (5); CREATE STATISTICS constrained_stat ON a FROM t WHERE a > 2; ```
1 parent 7073d3e commit 84315c8

File tree

18 files changed

+618
-51
lines changed

18 files changed

+618
-51
lines changed

pkg/jobs/jobspb/jobs.proto

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,21 @@ message CreateStatsDetails {
12641264

12651265
// If true, will collect partial table statistics at extreme values.
12661266
bool using_extremes = 9;
1267+
1268+
// WHERE clause for partial statistics collection. This field is only used
1269+
// to populate the predicate in the system.table_statistics table and to
1270+
// determine if this is a constrained stats collection. The actual constrained
1271+
// scan is done over the spans in the where_spans field.
1272+
string where_clause = 10;
1273+
1274+
// Spans over which to collect partial statistics with a WHERE clause.
1275+
repeated roachpb.Span where_spans = 11 [(gogoproto.nullable) = false];
1276+
1277+
// The ID of the index used to collect partial statistics with a WHERE clause.
1278+
int64 where_index_id = 12 [
1279+
(gogoproto.customname) = "WhereIndexID",
1280+
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb.IndexID"
1281+
];
12671282
}
12681283

12691284
message CreateStatsProgress {

pkg/sql/create_stats.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/cockroachdb/cockroach/pkg/featureflag"
1313
"github.com/cockroachdb/cockroach/pkg/jobs"
1414
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
15+
"github.com/cockroachdb/cockroach/pkg/roachpb"
1516
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
1617
"github.com/cockroachdb/cockroach/pkg/settings"
1718
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
@@ -139,6 +140,12 @@ type createStatsNode struct {
139140
// If it is false, the flow for create statistics is planned directly; this
140141
// is used when the statement is under EXPLAIN or EXPLAIN ANALYZE.
141142
runAsJob bool
143+
144+
// whereSpans are the spans corresponding to the WHERE clause, if any.
145+
whereSpans roachpb.Spans
146+
147+
// whereIndexID is the index to use to collect statistics with a WHERE clause.
148+
whereIndexID descpb.IndexID
142149
}
143150

144151
func (n *createStatsNode) startExec(params runParams) error {
@@ -280,11 +287,15 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
280287
return nil, errors.Errorf(`creating partial statistics at extremes is disabled`)
281288
}
282289

283-
// TODO(93998): Add support for WHERE.
290+
var whereClause string
284291
if n.Options.Where != nil {
285-
return nil, pgerror.New(pgcode.FeatureNotSupported,
286-
"creating partial statistics with a WHERE clause is not yet supported",
287-
)
292+
if n.whereSpans == nil {
293+
return nil, errors.AssertionFailedf(
294+
"expected whereSpans to be set for statistics with a WHERE clause")
295+
}
296+
// Safe to use AsString since whereClause is only used to populate the
297+
// predicate in system.table_statistics.
298+
whereClause = tree.AsString(n.Options.Where.Expr)
288299
}
289300

290301
if err := n.p.CheckPrivilege(ctx, tableDesc, privilege.SELECT); err != nil {
@@ -409,6 +420,9 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
409420
MaxFractionIdle: n.Options.Throttling,
410421
DeleteOtherStats: deleteOtherStats,
411422
UsingExtremes: n.Options.UsingExtremes,
423+
WhereClause: whereClause,
424+
WhereSpans: n.whereSpans,
425+
WhereIndexID: n.whereIndexID,
412426
},
413427
Progress: jobspb.CreateStatsProgress{},
414428
}, nil

pkg/sql/distsql_plan_stats.go

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,11 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
329329
// Initialize a dummy scanNode for the requested statistic.
330330
var scan scanNode
331331
scan.desc = desc
332-
err = scan.initDescSpecificCol(colCfg, column)
332+
if details.UsingExtremes {
333+
err = scan.initDescSpecificCol(colCfg, column)
334+
} else if details.WhereClause != "" {
335+
err = scan.initDescSpecificIndex(colCfg, column, details.WhereIndexID)
336+
}
333337
if err != nil {
334338
return nil, err
335339
}
@@ -342,11 +346,6 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
342346
colIdxMap.Set(c.GetID(), i)
343347
}
344348

345-
var sb span.Builder
346-
sb.InitAllowingExternalRowData(
347-
planCtx.EvalContext(), planCtx.ExtendedEvalCtx.Codec, desc, scan.index,
348-
)
349-
350349
var stat *stats.TableStatistic
351350
// Find the statistic from the newest table statistic for our column that is
352351
// not partial and not forecasted. The first one we find will be the latest
@@ -379,27 +378,47 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
379378
"column %s does not have a prior statistic",
380379
column.GetName())
381380
}
382-
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx, planCtx.EvalContext(), stat.Histogram)
383-
if err != nil {
384-
return nil, err
385-
}
386-
if lowerBound == nil {
387-
return nil, pgerror.Newf(
388-
pgcode.ObjectNotInPrerequisiteState,
389-
"only outer or NULL bounded buckets exist in %s@%s (table ID %d, column IDs %v), "+
390-
"so partial stats cannot be collected",
391-
scan.desc.GetName(), scan.index.GetName(), stat.TableID, stat.ColumnIDs,
381+
382+
var predicate string
383+
var prevLowerBound tree.Datum
384+
if details.UsingExtremes {
385+
var sb span.Builder
386+
sb.InitAllowingExternalRowData(
387+
planCtx.EvalContext(), planCtx.ExtendedEvalCtx.Codec, desc, scan.index,
392388
)
393-
}
394-
extremesSpans, err := bounds.ConstructUsingExtremesSpans(lowerBound, upperBound, scan.index)
395-
if err != nil {
396-
return nil, err
397-
}
398-
extremesPredicate := bounds.ConstructUsingExtremesPredicate(lowerBound, upperBound, column.GetName())
399-
// Get roachpb.Spans from constraint.Spans
400-
scan.spans, err = sb.SpansFromConstraintSpan(&extremesSpans, span.NoopSplitter())
401-
if err != nil {
402-
return nil, err
389+
390+
lowerBound, upperBound, err := bounds.GetUsingExtremesBounds(ctx,
391+
planCtx.EvalContext(), stat.Histogram)
392+
if err != nil {
393+
return nil, err
394+
}
395+
if lowerBound == nil {
396+
return nil, pgerror.Newf(
397+
pgcode.ObjectNotInPrerequisiteState,
398+
"only outer or NULL bounded buckets exist in %s@%s (table ID %d, column IDs %v), "+
399+
"so partial stats cannot be collected",
400+
scan.desc.GetName(), scan.index.GetName(), stat.TableID, stat.ColumnIDs,
401+
)
402+
}
403+
prevLowerBound = lowerBound
404+
405+
extremesSpans, err := bounds.ConstructUsingExtremesSpans(lowerBound,
406+
upperBound, scan.index)
407+
if err != nil {
408+
return nil, err
409+
}
410+
predicate = bounds.ConstructUsingExtremesPredicate(lowerBound, upperBound, column.GetName())
411+
// Get roachpb.Spans from constraint.Spans
412+
scan.spans, err = sb.SpansFromConstraintSpan(&extremesSpans, span.NoopSplitter())
413+
if err != nil {
414+
return nil, err
415+
}
416+
} else if details.WhereClause != "" {
417+
predicate = details.WhereClause
418+
scan.spans = details.WhereSpans
419+
} else {
420+
return nil, errors.AssertionFailedf(
421+
"partial stats require either USING EXTREMES or a WHERE clause")
403422
}
404423
p, err := dsp.createTableReaders(ctx, planCtx, &scan)
405424
if err != nil {
@@ -419,10 +438,13 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
419438
HistogramMaxBuckets: reqStat.histogramMaxBuckets,
420439
Columns: make([]uint32, len(reqStat.columns)),
421440
StatName: reqStat.name,
422-
PartialPredicate: extremesPredicate,
423-
FullStatisticID: stat.StatisticID,
424-
PrevLowerBound: tree.Serialize(lowerBound),
441+
PartialPredicate: predicate,
442+
}
443+
if details.UsingExtremes && prevLowerBound != nil {
444+
spec.PrevLowerBound = tree.Serialize(prevLowerBound)
445+
spec.FullStatisticID = stat.StatisticID
425446
}
447+
426448
// For now, this loop should iterate only once, as we only
427449
// handle single-column partial statistics.
428450
// TODO(faizaanmadhani): Add support for multi-column partial stats next
@@ -767,7 +789,7 @@ func (dsp *DistSQLPlanner) createPlanForCreateStats(
767789
return nil, errors.New("no stats requested")
768790
}
769791

770-
if details.UsingExtremes {
792+
if details.UsingExtremes || details.WhereClause != "" {
771793
return dsp.createPartialStatsPlan(ctx, planCtx, tableDesc, reqStats, jobID, details, numIndexes, curIndex)
772794
}
773795
return dsp.createStatsPlan(ctx, planCtx, semaCtx, tableDesc, reqStats, jobID, details, numIndexes, curIndex)

pkg/sql/distsql_spec_exec_factory.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1707,7 +1707,7 @@ func (e *distSQLSpecExecFactory) ConstructCancelSessions(
17071707
}
17081708

17091709
func (e *distSQLSpecExecFactory) ConstructCreateStatistics(
1710-
cs *tree.CreateStats,
1710+
cs *tree.CreateStats, table cat.Table, index cat.Index, whereConstraint *constraint.Constraint,
17111711
) (exec.Node, error) {
17121712
return nil, unimplemented.NewWithIssue(47473, "experimental opt-driven distsql planning: create statistics")
17131713
}

pkg/sql/execinfrapb/processors_table_stats.proto

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ message SketchSpec {
4343
// and is the empty string for a full table statistic.
4444
optional string partial_predicate = 7 [(gogoproto.nullable) = false];
4545

46-
// FullStatisticID is non-zero for partial statistics and 0 for full
47-
// statistics. It is the statistic id of the full statistic that this partial
48-
// statistic was derived from.
46+
// FullStatisticID is non-zero for partial statistics USING EXTREMES and 0
47+
// otherwise. It is the statistic id of the full statistic that this partial
48+
// statistic on extreme values was derived from.
4949
optional uint64 full_statistic_id = 8 [(gogoproto.customname) = "FullStatisticID", (gogoproto.nullable) = false];
5050

5151
// PrevLowerBound is a tree.Datum serialized into a string representing

0 commit comments

Comments
 (0)