Skip to content

Commit 1901430

Browse files
authored
Merge pull request #154755 from cockroachdb/blathers/backport-release-25.4-153419
release-25.4: sql/stats: merge all arbitrary partial and full table statistics
2 parents f25564f + 8db8846 commit 1901430

File tree

17 files changed

+1468
-515
lines changed

17 files changed

+1468
-515
lines changed

pkg/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2351,6 +2351,7 @@ GO_TARGETS = [
23512351
"//pkg/sql/sem/plpgsqltree:plpgsqltree",
23522352
"//pkg/sql/sem/semenumpb:semenumpb",
23532353
"//pkg/sql/sem/transform:transform",
2354+
"//pkg/sql/sem/tree/datumrange:datumrange",
23542355
"//pkg/sql/sem/tree/evalgen:evalgen",
23552356
"//pkg/sql/sem/tree/evalgen:evalgen_lib",
23562357
"//pkg/sql/sem/tree/treebin:treebin",

pkg/sql/distsql_plan_stats.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
304304
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "multi-column partial statistics are not currently supported")
305305
}
306306

307+
if !reqStat.histogram {
308+
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "partial statistics without histograms are not supported")
309+
}
310+
307311
var typeResolver *descs.DistSQLTypeResolver
308312
if p := planCtx.planner; p != nil {
309313
r := descs.NewDistSQLTypeResolver(p.Descriptors(), p.Txn())
@@ -378,6 +382,13 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
378382
"column %s does not have a prior statistic",
379383
column.GetName())
380384
}
385+
if len(stat.Histogram) == 1 && stat.Histogram[0].UpperBound == tree.DNull {
386+
return nil, pgerror.Newf(
387+
pgcode.ObjectNotInPrerequisiteState,
388+
"the latest full statistic histogram for column %s has only NULL values",
389+
column.GetName(),
390+
)
391+
}
381392

382393
var predicate string
383394
var prevLowerBound tree.Datum

pkg/sql/logictest/testdata/logic_test/constrained_stats

Lines changed: 6 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -64,31 +64,6 @@ upper_bound range_rows distinct_range_rows equal_rows
6464
50.00 0 0 1
6565
55.00 0 0 1
6666

67-
statement ok
68-
CREATE STATISTICS stat_price_ranges ON price FROM products WHERE price <= 20.00 OR price > 40.00
69-
70-
query TTIIIT colnames
71-
SELECT statistics_name, column_names, row_count, distinct_count, null_count, partial_predicate
72-
FROM [SHOW STATISTICS FOR TABLE products]
73-
WHERE statistics_name = 'stat_price_ranges'
74-
----
75-
statistics_name column_names row_count distinct_count null_count partial_predicate
76-
stat_price_ranges {price} 6 6 0 (price <= 20.00) OR (price > 40.00)
77-
78-
let $hist_stat_price_ranges
79-
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE products] WHERE statistics_name = 'stat_price_ranges';
80-
81-
query TIRI colnames,nosort
82-
SHOW HISTOGRAM $hist_stat_price_ranges
83-
----
84-
upper_bound range_rows distinct_range_rows equal_rows
85-
10.00 0 0 1
86-
15.00 0 0 1
87-
20.00 0 0 1
88-
45.00 0 0 1
89-
50.00 0 0 1
90-
55.00 0 0 1
91-
9267
statement ok
9368
CREATE STATISTICS stat_active_products ON status FROM products WHERE status = 'active'
9469

@@ -111,17 +86,6 @@ WHERE statistics_name = 'stat_mid_range_products'
11186
statistics_name column_names row_count distinct_count null_count partial_predicate
11287
stat_mid_range_products {category_id} 8 2 0 category_id BETWEEN 1 AND 2
11388

114-
statement ok
115-
CREATE STATISTICS stat_specific_categories ON category_id FROM products WHERE category_id IN (1, 3)
116-
117-
query TTIIIT colnames
118-
SELECT statistics_name, column_names, row_count, distinct_count, null_count, partial_predicate
119-
FROM [SHOW STATISTICS FOR TABLE products]
120-
WHERE statistics_name = 'stat_specific_categories'
121-
----
122-
statistics_name column_names row_count distinct_count null_count partial_predicate
123-
stat_specific_categories {category_id} 6 2 0 category_id IN (1, 3)
124-
12589
# Verify error cases
12690
statement error pq: partial statistics with WHERE must be on a single column
12791
CREATE STATISTICS stat_multi_col ON category_id, price FROM products WHERE status = 'active'
@@ -147,6 +111,12 @@ CREATE STATISTICS stat_subquery ON category_id FROM products WHERE category_id I
147111
statement error pq: predicate could not become a constrained scan of an index
148112
CREATE STATISTICS stat_func ON status FROM products WHERE length(status) > 6
149113

114+
statement error pq: unimplemented: CREATE STATISTICS with a WHERE clause that produces multiple spans is not yet supported
115+
CREATE STATISTICS stat_func ON price FROM products WHERE price < 20.00 OR price > 30.00
116+
117+
statement error pq: unimplemented: CREATE STATISTICS with a WHERE clause that produces multiple spans is not yet supported
118+
CREATE STATISTICS stat_specific_categories ON category_id FROM products WHERE category_id IN (1, 3)
119+
150120
statement error syntax error
151121
CREATE STATISTICS stat_error2 ON price FROM products WHERE price >
152122

pkg/sql/logictest/testdata/logic_test/distsql_stats

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2675,6 +2675,18 @@ RESET enable_create_stats_using_extremes
26752675
statement error pq: multi-column partial statistics are not currently supported
26762676
CREATE STATISTICS abcd_a_b ON a, c FROM abcd USING EXTREMES;
26772677

2678+
statement ok
2679+
SET CLUSTER SETTING sql.stats.histogram_collection.enabled = false
2680+
2681+
statement error pq: partial statistics without histograms are not supported
2682+
CREATE STATISTICS abcd_a_b ON a FROM abcd USING EXTREMES;
2683+
2684+
statement error pq: partial statistics without histograms are not supported
2685+
CREATE STATISTICS abcd_a_b ON a FROM abcd WHERE a > 5;
2686+
2687+
statement ok
2688+
RESET CLUSTER SETTING sql.stats.histogram_collection.enabled
2689+
26782690
# Verify that a non-inverted index string column with a string histogram
26792691
# can have partial statistics
26802692
statement ok
@@ -2730,9 +2742,15 @@ CREATE TABLE xyz (x INT, y INT, z INT, INDEX (x, y));
27302742
statement error pq: column x does not have a prior statistic
27312743
EXPLAIN ANALYZE CREATE STATISTICS xyz_x ON x FROM xyz USING EXTREMES;
27322744

2745+
statement error pq: column x does not have a prior statistic
2746+
EXPLAIN ANALYZE CREATE STATISTICS xyz_x ON x FROM xyz WHERE x > 5;
2747+
27332748
statement error pq: the latest full statistic for column a has no histogram
27342749
EXPLAIN ANALYZE CREATE STATISTICS u_partial ON a FROM u USING EXTREMES;
27352750

2751+
statement error pq: the latest full statistic for column a has no histogram
2752+
EXPLAIN ANALYZE CREATE STATISTICS u_partial ON a FROM u WHERE a > 5;
2753+
27362754
statement error pq: table xy does not contain a non-partial forward index with y as a prefix column
27372755
CREATE STATISTICS xy_y_partial ON y FROM xy USING EXTREMES;
27382756

@@ -2750,9 +2768,12 @@ CREATE STATISTICS only_null_stat ON a FROM only_null;
27502768
statement ok
27512769
SELECT crdb_internal.clear_table_stats_cache();
27522770

2753-
statement error pq: only outer or NULL bounded buckets exist in only_null@only_null_a_idx \(table ID \d+, column IDs \[1\]\), so partial stats cannot be collected
2771+
statement error pq: the latest full statistic histogram for column a has only NULL values
27542772
EXPLAIN ANALYZE CREATE STATISTICS only_null_partial ON a FROM only_null USING EXTREMES;
27552773

2774+
statement error pq: the latest full statistic histogram for column a has only NULL values
2775+
EXPLAIN ANALYZE CREATE STATISTICS only_null_partial ON a FROM only_null WHERE a > 5;
2776+
27562777
statement ok
27572778
CREATE INDEX ON xy (y) WHERE y > 5;
27582779

pkg/sql/opt/cat/table.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,8 @@ type TableStatistic interface {
271271
// inverted index histograms, this will always return types.Bytes.
272272
HistogramType() *types.T
273273

274-
// IsPartial returns true if this statistic was collected with a where
275-
// clause. (If the where clause was something like "WHERE 1 = 1" or "WHERE
276-
// true" this could technically be a full statistic rather than a partial
277-
// statistic, but this function does not check for this.)
274+
// IsPartial returns true if this statistic was collected with USING EXTREMES
275+
// or with a WHERE clause.
278276
IsPartial() bool
279277

280278
// IsMerged returns true if this statistic was created by merging a partial

pkg/sql/opt/exec/execbuilder/testdata/partial_stats

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,3 +1478,125 @@ vectorized: true
14781478
spans: [/10 - /10]
14791479

14801480
subtest end
1481+
1482+
# Ensure that constrained partial statistics are merged into full statistics
1483+
# and used by the optimizer.
1484+
1485+
statement ok
1486+
CREATE TABLE m (b INT PRIMARY KEY)
1487+
1488+
statement ok
1489+
ALTER TABLE m INJECT STATISTICS '[
1490+
{
1491+
"id": 1,
1492+
"avg_size": 1,
1493+
"columns": ["b"],
1494+
"created_at": "2000-01-01 00:00:00.000000",
1495+
"distinct_count": 4,
1496+
"histo_buckets": [
1497+
{"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "10"},
1498+
{"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "20"},
1499+
{"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "30"},
1500+
{"distinct_range": 0, "num_eq": 1, "num_range": 0, "upper_bound": "40"}
1501+
],
1502+
"histo_col_type": "INT8",
1503+
"histo_version": 2,
1504+
"name": "full",
1505+
"null_count": 0,
1506+
"row_count": 4
1507+
},
1508+
{
1509+
"id": 2,
1510+
"avg_size": 1,
1511+
"columns": ["b"],
1512+
"created_at": "2000-01-02 00:00:00.000000",
1513+
"distinct_count": 4,
1514+
"histo_buckets": [
1515+
{"distinct_range": 0, "num_eq": 3, "num_range": 0, "upper_bound": "20"},
1516+
{"distinct_range": 2, "num_eq": 2, "num_range": 3, "upper_bound": "30"}
1517+
],
1518+
"histo_col_type": "INT8",
1519+
"histo_version": 2,
1520+
"name": "partial",
1521+
"null_count": 0,
1522+
"partial_predicate": "((b > 15:::INT8) AND (b < 35:::INT8))",
1523+
"row_count": 8,
1524+
"full_statistic_id": 1
1525+
}
1526+
]'
1527+
1528+
query TTTIIII colnames
1529+
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
1530+
FROM [SHOW STATISTICS FOR TABLE m WITH MERGE]
1531+
ORDER BY created
1532+
----
1533+
statistics_name column_names created row_count distinct_count null_count avg_size
1534+
full {b} 2000-01-01 00:00:00 +0000 UTC 4 4 0 1
1535+
partial {b} 2000-01-02 00:00:00 +0000 UTC 8 4 0 1
1536+
__merged__ {b} 2000-01-02 00:00:00 +0000 UTC 10 6 0 1
1537+
1538+
query T
1539+
SELECT jsonb_pretty(stat)
1540+
FROM (
1541+
SELECT jsonb_array_elements(statistics) AS stat FROM [SHOW STATISTICS USING JSON FOR TABLE m WITH MERGE]
1542+
)
1543+
WHERE stat->>'name' = '__merged__';
1544+
----
1545+
{
1546+
"avg_size": 1,
1547+
"columns": [
1548+
"b"
1549+
],
1550+
"created_at": "2000-01-02 00:00:00",
1551+
"distinct_count": 6,
1552+
"histo_buckets": [
1553+
{
1554+
"distinct_range": 0,
1555+
"num_eq": 1,
1556+
"num_range": 0,
1557+
"upper_bound": "10"
1558+
},
1559+
{
1560+
"distinct_range": 0,
1561+
"num_eq": 3,
1562+
"num_range": 0,
1563+
"upper_bound": "20"
1564+
},
1565+
{
1566+
"distinct_range": 2,
1567+
"num_eq": 2,
1568+
"num_range": 3,
1569+
"upper_bound": "30"
1570+
},
1571+
{
1572+
"distinct_range": 0,
1573+
"num_eq": 1,
1574+
"num_range": 0,
1575+
"upper_bound": "40"
1576+
}
1577+
],
1578+
"histo_col_type": "INT8",
1579+
"histo_version": 3,
1580+
"name": "__merged__",
1581+
"null_count": 0,
1582+
"row_count": 10
1583+
}
1584+
1585+
# Now clear the stats cache so that the query below is guaranteed to pick up the
1586+
# new stats (partial and merged).
1587+
statement ok
1588+
SELECT crdb_internal.clear_table_stats_cache();
1589+
1590+
query T
1591+
EXPLAIN (OPT, VERBOSE) SELECT * FROM m WHERE b < 35
1592+
----
1593+
scan m
1594+
├── columns: b:1
1595+
├── constraint: /1: [ - /34]
1596+
├── stats: [rows=9, distinct(1)=5, null(1)=0]
1597+
│ histogram(1)= 0 1 0 3 3 2 0 0
1598+
│ <--- 10 --- 20 --- 30 --- 34
1599+
├── cost: 27.11
1600+
├── cost-flags: unbounded-cardinality
1601+
├── key: (1)
1602+
└── distribution: test

pkg/sql/opt/optbuilder/misc_statements.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,5 +323,10 @@ func (b *Builder) buildWhereForStatistics(
323323
"predicate is a contradiction"))
324324
}
325325

326+
if cons.Spans.Count() > 1 {
327+
panic(unimplemented.NewWithIssue(154040,
328+
"CREATE STATISTICS with a WHERE clause that produces multiple spans is not yet supported"))
329+
}
330+
326331
return indexOrd, &cons
327332
}

pkg/sql/opt/props/BUILD.bazel

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,12 @@ go_library(
2323
"//pkg/sql/opt",
2424
"//pkg/sql/opt/cat",
2525
"//pkg/sql/opt/constraint",
26-
"//pkg/sql/rowenc/keyside",
2726
"//pkg/sql/sem/eval",
2827
"//pkg/sql/sem/tree",
28+
"//pkg/sql/sem/tree/datumrange",
2929
"//pkg/sql/sem/volatility",
3030
"//pkg/sql/types",
3131
"//pkg/util/buildutil",
32-
"//pkg/util/encoding",
33-
"//pkg/util/timetz",
3432
"@com_github_cockroachdb_errors//:errors",
3533
"@com_github_cockroachdb_redact//:redact",
3634
"@com_github_olekukonko_tablewriter//:tablewriter",

0 commit comments

Comments
 (0)