Skip to content

Commit 47465f9

Browse files
craig[bot]DrewKimball
andcommitted
Merge #156672
156672: sql/colfetcher: don't scan extra KVs beyond limit r=yuzefovich a=DrewKimball #### sql/colfetcher: don't scan extra KVs beyond limit When there are multiple column families in an index, the fetcher implementations determine a SQL row is finished by comparing against the next KV. As an optimization, `cfetcher` keeps track of the max column family ID in the table. When it encounters this family, it knows the current SQL row is finished. Since v22.1, `cfetcher` will fetch exactly the number of KVs needed in order to satisfy its hard limit in the first batch. The thinking was that we can avoid having to grab an extra KV for comparison because either: * One or more column families are elided due to NULL values, in which case we'll fetch more KVs than we need and have at least one for determining that the last row is done. * Or there are no elided entries, in which case we will see the max family ID for the last row and skip fetching an additional KV for comparison. This could result in secondary indexes fetching an additional batch with ~10k KVs, because secondary indexes may not contain the column family with the maximum ID, necessitating a fetch for an additional KV for comparison. This commit prevents the unnecessary fetch by plumbing the maximum family ID for the *index* being read, not just the table. Fixes #156613 Release note (bug fix): Fixed a bug that could cause a scan over a secondary index to read significantly more KVs than necessary in order to satisfy a limit when the scanned index has more than one column family. Co-authored-by: Drew Kimball <[email protected]>
2 parents 5551329 + 1cf4a91 commit 47465f9

File tree

8 files changed

+190
-10
lines changed

8 files changed

+190
-10
lines changed

pkg/sql/catalog/descriptor.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,10 @@ type TableDescriptor interface {
596596
// one k/v pair per family in the table, just like a primary index.
597597
IndexKeysPerRow(idx Index) int
598598

599+
// MaxFamilyIDForIndex returns the maximum column family ID used to encode a
600+
// row for the given index.
601+
MaxFamilyIDForIndex(idx Index) (descpb.FamilyID, error)
602+
599603
// IndexFetchSpecKeyAndSuffixColumns returns information about the key and
600604
// suffix columns, suitable for populating a fetchpb.IndexFetchSpec.
601605
IndexFetchSpecKeyAndSuffixColumns(idx Index) []fetchpb.IndexFetchSpec_KeyColumn

pkg/sql/catalog/fetchpb/index_fetch.proto

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ message IndexFetchSpec {
111111
// index ID. It is used to speed up the decoding process.
112112
optional uint32 key_prefix_length = 11 [(gogoproto.nullable) = false];
113113

114+
// Before v26.2, MaxFamilyID is the highest family ID in the table. In 26.2
115+
// and later, MaxFamilyID is the highest family ID that is included in the
116+
// index being scanned. It is used to quickly determine SQL row boundaries for
117+
// tables with multiple families, which otherwise requires comparing against a
118+
// key from the next SQL row, and can interfere with optimizations for row
119+
// limits.
114120
optional uint32 max_family_id = 12 [(gogoproto.nullable) = false,
115121
(gogoproto.customname) = "MaxFamilyID",
116122
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/sem/catid.FamilyID"];

pkg/sql/catalog/tabledesc/structured.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,34 @@ func (desc *wrapper) IndexKeysPerRow(idx catalog.Index) int {
122122
return numUsedFamilies
123123
}
124124

125+
// MaxFamilyIDForIndex implements the TableDescriptor interface.
126+
func (desc *wrapper) MaxFamilyIDForIndex(idx catalog.Index) (descpb.FamilyID, error) {
127+
if desc.PrimaryIndex.ID == idx.GetID() || idx.GetEncodingType() == catenumpb.PrimaryIndexEncoding {
128+
// The list of families is sorted by ID.
129+
return desc.Families[len(desc.Families)-1].ID, nil
130+
}
131+
if idx.NumSecondaryStoredColumns() == 0 || len(desc.Families) == 1 {
132+
// Secondary indexes always include the 0th family. If there are no stored
133+
// columns or no additional families, then the max family ID is that of the
134+
// 0th family.
135+
return desc.Families[0].ID, nil
136+
}
137+
// Calculate the max column family used by the secondary index. We
138+
// only need to look at the stored columns because column families are only
139+
// applicable to the value part of the KV. Iterate in reverse order, since
140+
// the list of families is sorted by ID, and we want the max family ID.
141+
storedColumnIDs := idx.CollectSecondaryStoredColumnIDs()
142+
for i := len(desc.Families) - 1; i >= 0; i-- {
143+
family := desc.Families[i]
144+
for _, columnID := range family.ColumnIDs {
145+
if storedColumnIDs.Contains(columnID) {
146+
return family.ID, nil
147+
}
148+
}
149+
}
150+
return 0, errors.AssertionFailedf("could not find max column family for index %d", idx.GetID())
151+
}
152+
125153
// BuildIndexName returns an index name that is not equal to any
126154
// of tableDesc's indexes, roughly following Postgres's conventions for naming
127155
// anonymous indexes. For example:

pkg/sql/colfetcher/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,15 @@ go_test(
8585
srcs = [
8686
"bytes_read_test.go",
8787
"main_test.go",
88+
"scan_limit_test.go",
8889
"vectorized_batch_size_test.go",
8990
],
9091
deps = [
9192
"//pkg/base",
9293
"//pkg/security/securityassets",
9394
"//pkg/security/securitytest",
9495
"//pkg/server",
96+
"//pkg/sql/sem/eval",
9597
"//pkg/testutils",
9698
"//pkg/testutils/serverutils",
9799
"//pkg/testutils/skip",

pkg/sql/colfetcher/cfetcher.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1440,8 +1440,12 @@ func (cf *cFetcher) finalizeBatch() {
14401440
// getCurrentColumnFamilyID returns the column family id of the key in
14411441
// cf.machine.nextKV.Key.
14421442
func (cf *cFetcher) getCurrentColumnFamilyID() (descpb.FamilyID, error) {
1443-
// If the table only has 1 column family, and its ID is 0, we know that the
1443+
// If the index only has 1 column family, and its ID is 0, we know that the
14441444
// key has to be the 0th column family.
1445+
// TODO(drewk): once we know all nodes are at least 26.2, we can perform this
1446+
// optimization whenever cf.table.spec.MaxKeysPerRow == 1, since at that point
1447+
// MaxFamilyID is always for the just index being scanned, not the whole
1448+
// table.
14451449
if cf.table.spec.MaxFamilyID == 0 {
14461450
return 0, nil
14471451
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package colfetcher_test
7+
8+
import (
9+
"context"
10+
"regexp"
11+
"testing"
12+
13+
"github.com/cockroachdb/cockroach/pkg/base"
14+
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
15+
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
16+
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
17+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
18+
"github.com/cockroachdb/cockroach/pkg/util/log"
19+
"github.com/stretchr/testify/require"
20+
)
21+
22+
// TestScanLimitKVPairsRead is a regression test for fetching an extra KV batch
23+
// beyond the limit when the index has more than one column family and doesn't
24+
// include the max column family. It verifies that the correct number of KV
25+
// pairs are read as reported in EXPLAIN ANALYZE output.
26+
func TestScanLimitKVPairsRead(t *testing.T) {
27+
defer leaktest.AfterTest(t)()
28+
defer log.Scope(t).Close(t)
29+
30+
ctx := context.Background()
31+
s, conn, _ := serverutils.StartServer(t, base.TestServerArgs{
32+
Knobs: base.TestingKnobs{SQLEvalContext: &eval.TestingKnobs{ForceProductionValues: true}},
33+
})
34+
defer s.Stopper().Stop(ctx)
35+
sqlDB := sqlutils.MakeSQLRunner(conn)
36+
37+
checkRows := func(rows [][]string) {
38+
// Look for "KV pairs read" in the output.
39+
kvPairsReadRE := regexp.MustCompile(`KV pairs read: ([\d,]+)`)
40+
var kvPairsCount string
41+
for _, row := range rows {
42+
if len(row) != 1 {
43+
t.Fatalf("expected one column in EXPLAIN ANALYZE output")
44+
}
45+
if matches := kvPairsReadRE.FindStringSubmatch(row[0]); len(matches) > 0 {
46+
kvPairsCount = matches[1]
47+
break
48+
}
49+
}
50+
require.Equal(t, "15", kvPairsCount,
51+
"expected exactly 15 KV pairs to be read for LIMIT 5 with 3 column families",
52+
)
53+
}
54+
55+
sqlDB.Exec(t, `
56+
CREATE TABLE t_multi_cf (
57+
k INT, a INT, b INT, c INT,
58+
INDEX (k) STORING (a, b),
59+
FAMILY (k), FAMILY (a), FAMILY (b), FAMILY (c)
60+
)`)
61+
62+
// Case with no NULL values.
63+
sqlDB.Exec(t, `INSERT INTO t_multi_cf (SELECT t, t%19, t*7, t//3 FROM generate_series(1, 100) g(t))`)
64+
rows := sqlDB.QueryStr(t, `EXPLAIN ANALYZE SELECT a, b FROM t_multi_cf LIMIT 5`)
65+
checkRows(rows)
66+
67+
// Case with some NULL values in the stored columns.
68+
sqlDB.Exec(t, `UPDATE t_multi_cf SET a = NULL WHERE k % 10 = 0`)
69+
rows = sqlDB.QueryStr(t, `EXPLAIN ANALYZE SELECT a, b FROM t_multi_cf LIMIT 5`)
70+
checkRows(rows)
71+
}

pkg/sql/rowenc/index_fetch.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,10 @@ func InitIndexFetchSpec(
6363

6464
s.FamilyDefaultColumns = table.FamilyDefaultColumns()
6565

66-
families := table.GetFamilies()
67-
for i := range families {
68-
if id := families[i].ID; id > s.MaxFamilyID {
69-
s.MaxFamilyID = id
70-
}
66+
var err error
67+
s.MaxFamilyID, err = table.MaxFamilyIDForIndex(index)
68+
if err != nil {
69+
return err
7170
}
7271

7372
s.KeyAndSuffixColumns = table.IndexFetchSpecKeyAndSuffixColumns(index)

pkg/sql/rowenc/testdata/index-fetch

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ CREATE TABLE fam (
407407

408408
INDEX b(b),
409409
INDEX b2(b) STORING (d),
410+
INDEX b3(b) STORING (c),
410411
INDEX c(c),
411412
INDEX c2(c) STORING (d),
412413

@@ -491,7 +492,7 @@ columns:
491492
"num_key_suffix_columns": 1,
492493
"max_keys_per_row": 1,
493494
"key_prefix_length": 2,
494-
"max_family_id": 2,
495+
"max_family_id": 0,
495496
"family_default_columns": [
496497
{
497498
"family_id": 0,
@@ -601,6 +602,71 @@ columns:
601602
]
602603
}
603604

605+
# Index b3 spans two families, but doesn't include the max column family ID.
606+
index-fetch
607+
table: fam
608+
index: b3
609+
columns:
610+
- a
611+
----
612+
{
613+
"version": 1,
614+
"table_id": 107,
615+
"table_name": "fam",
616+
"index_id": 4,
617+
"index_name": "b3",
618+
"is_secondary_index": true,
619+
"is_unique_index": false,
620+
"geo_config": {},
621+
"encoding_type": 0,
622+
"num_key_suffix_columns": 1,
623+
"max_keys_per_row": 2,
624+
"key_prefix_length": 2,
625+
"max_family_id": 1,
626+
"family_default_columns": [
627+
{
628+
"family_id": 0,
629+
"default_column_id": 2
630+
},
631+
{
632+
"family_id": 1,
633+
"default_column_id": 3
634+
}
635+
],
636+
"key_and_suffix_columns": [
637+
{
638+
"column": {
639+
"column_id": 2,
640+
"name": "b",
641+
"type": "family: StringFamily\nwidth: 0\nprecision: 0\nlocale: \"\"\nvisible_type: 0\noid: 25\ntime_precision_is_set: false\n",
642+
"is_non_nullable": false
643+
},
644+
"direction": 0,
645+
"is_composite": false,
646+
"is_inverted": false
647+
},
648+
{
649+
"column": {
650+
"column_id": 1,
651+
"name": "a",
652+
"type": "family: IntFamily\nwidth: 64\nprecision: 0\nlocale: \"\"\nvisible_type: 3\noid: 20\ntime_precision_is_set: false\n",
653+
"is_non_nullable": true
654+
},
655+
"direction": 0,
656+
"is_composite": false,
657+
"is_inverted": false
658+
}
659+
],
660+
"fetched_columns": [
661+
{
662+
"column_id": 1,
663+
"name": "a",
664+
"type": "family: IntFamily\nwidth: 64\nprecision: 0\nlocale: \"\"\nvisible_type: 3\noid: 20\ntime_precision_is_set: false\n",
665+
"is_non_nullable": true
666+
}
667+
]
668+
}
669+
604670
# Index c has one key per row.
605671
index-fetch
606672
table: fam
@@ -612,7 +678,7 @@ columns:
612678
"version": 1,
613679
"table_id": 107,
614680
"table_name": "fam",
615-
"index_id": 4,
681+
"index_id": 5,
616682
"index_name": "c",
617683
"is_secondary_index": true,
618684
"is_unique_index": false,
@@ -621,7 +687,7 @@ columns:
621687
"num_key_suffix_columns": 1,
622688
"max_keys_per_row": 1,
623689
"key_prefix_length": 2,
624-
"max_family_id": 2,
690+
"max_family_id": 0,
625691
"family_default_columns": [
626692
{
627693
"family_id": 0,
@@ -677,7 +743,7 @@ columns:
677743
"version": 1,
678744
"table_id": 107,
679745
"table_name": "fam",
680-
"index_id": 5,
746+
"index_id": 6,
681747
"index_name": "c2",
682748
"is_secondary_index": true,
683749
"is_unique_index": false,

0 commit comments

Comments
 (0)