Skip to content

Commit c40ddee

Browse files
craig[bot]Uzair5162pav-kv
committed
152469: sql/stats: support collecting partial statistics on arbitrary constraints r=Uzair5162 a=Uzair5162 This commit adds support to manually create partial statistics with a `WHERE` clause, where the predicate fully constrains the first column of an index. Additionally, the predicate must reference a single outer column, which must be the same as the single column that stats are being collected on. These stats are stored in `system.table_statistics` with their predicate. Part of: #93998 Release note (sql change): Users can now manually create single-column partial statistics on boolean predicate expressions that can become simple index scans. These statistics can be created by adding a constraining `WHERE` expression to `CREATE STATISTICS`. For example: ``` CREATE TABLE t (a INT PRIMARY KEY); INSERT INTO t VALUES (1), (2), (3), (4), (5); CREATE STATISTICS constrained_stat ON a FROM t WHERE a > 2; ``` 153344: kvstorage: add echotest for DestroyReplica r=arulajmani a=pav-kv This PR introduces a basic echotest for `DestroyReplica` storage operation. To be extended with a similar test for replica destruction with separated engines. Part of #152845 Co-authored-by: Uzair Ahmad <[email protected]> Co-authored-by: Pavel Kalinnikov <[email protected]>
3 parents 232b919 + 84315c8 + f41e8ef commit c40ddee

28 files changed

+811
-52
lines changed

pkg/jobs/jobspb/jobs.proto

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1264,6 +1264,21 @@ message CreateStatsDetails {
12641264

12651265
// If true, will collect partial table statistics at extreme values.
12661266
bool using_extremes = 9;
1267+
1268+
// WHERE clause for partial statistics collection. This field is only used
1269+
// to populate the predicate in the system.table_statistics table and to
1270+
// determine if this is a constrained stats collection. The actual constrained
1271+
// scan is done over the spans in the where_spans field.
1272+
string where_clause = 10;
1273+
1274+
// Spans over which to collect partial statistics with a WHERE clause.
1275+
repeated roachpb.Span where_spans = 11 [(gogoproto.nullable) = false];
1276+
1277+
// The ID of the index used to collect partial statistics with a WHERE clause.
1278+
int64 where_index_id = 12 [
1279+
(gogoproto.customname) = "WhereIndexID",
1280+
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb.IndexID"
1281+
];
12671282
}
12681283

12691284
message CreateStatsProgress {

pkg/kv/kvserver/kvstorage/BUILD.bazel

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,34 @@ go_test(
3838
srcs = [
3939
"cluster_version_test.go",
4040
"datadriven_test.go",
41+
"destroy_test.go",
4142
],
4243
data = glob(["testdata/**"]),
4344
embed = [":kvstorage"],
4445
deps = [
4546
"//pkg/clusterversion",
4647
"//pkg/keys",
48+
"//pkg/kv/kvpb",
49+
"//pkg/kv/kvserver/concurrency/lock",
50+
"//pkg/kv/kvserver/kvserverpb",
51+
"//pkg/kv/kvserver/logstore",
52+
"//pkg/kv/kvserver/print",
4753
"//pkg/kv/kvserver/stateloader",
4854
"//pkg/raft/raftpb",
4955
"//pkg/roachpb",
5056
"//pkg/settings/cluster",
5157
"//pkg/storage",
5258
"//pkg/testutils",
5359
"//pkg/testutils/datapathutils",
60+
"//pkg/testutils/echotest",
5461
"//pkg/util/hlc",
5562
"//pkg/util/leaktest",
5663
"//pkg/util/log",
5764
"//pkg/util/stop",
5865
"//pkg/util/tracing",
5966
"//pkg/util/uuid",
6067
"@com_github_cockroachdb_datadriven//:datadriven",
68+
"@com_github_cockroachdb_redact//:redact",
6169
"@com_github_stretchr_testify//require",
6270
],
6371
)

pkg/kv/kvserver/kvstorage/datadriven_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package kvstorage
88
import (
99
"context"
1010
"fmt"
11+
"path/filepath"
1112
"regexp"
1213
"strings"
1314
"testing"
@@ -100,7 +101,8 @@ func TestDataDriven(t *testing.T) {
100101
// Scan stats (shown after loading the range descriptors) can be non-deterministic.
101102
reStripScanStats := regexp.MustCompile(`stats: .*$`)
102103

103-
datadriven.Walk(t, datapathutils.TestDataPath(t), func(t *testing.T, path string) {
104+
dir := filepath.Join(datapathutils.TestDataPath(t), t.Name())
105+
datadriven.Walk(t, dir, func(t *testing.T, path string) {
104106
e := newEnv(t)
105107
defer e.close()
106108
datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) (output string) {
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package kvstorage
7+
8+
import (
9+
"context"
10+
"fmt"
11+
"path/filepath"
12+
"strings"
13+
"testing"
14+
15+
"github.com/cockroachdb/cockroach/pkg/keys"
16+
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
17+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock"
18+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
19+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
20+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/print"
21+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
22+
"github.com/cockroachdb/cockroach/pkg/raft/raftpb"
23+
"github.com/cockroachdb/cockroach/pkg/roachpb"
24+
"github.com/cockroachdb/cockroach/pkg/storage"
25+
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
26+
"github.com/cockroachdb/cockroach/pkg/testutils/echotest"
27+
"github.com/cockroachdb/cockroach/pkg/util/hlc"
28+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
29+
"github.com/cockroachdb/cockroach/pkg/util/log"
30+
"github.com/cockroachdb/cockroach/pkg/util/uuid"
31+
"github.com/cockroachdb/redact"
32+
"github.com/stretchr/testify/require"
33+
)
34+
35+
func TestDestroyReplica(t *testing.T) {
36+
defer leaktest.AfterTest(t)()
37+
defer log.Scope(t).Close(t)
38+
39+
storage.DisableMetamorphicSimpleValueEncoding(t) // for deterministic output
40+
eng := storage.NewDefaultInMemForTesting()
41+
defer eng.Close()
42+
43+
var sb redact.StringBuilder
44+
ctx := context.Background()
45+
mutate := func(name string, write func(storage.ReadWriter)) {
46+
b := eng.NewBatch()
47+
defer b.Close()
48+
write(b)
49+
str, err := print.DecodeWriteBatch(b.Repr())
50+
require.NoError(t, err)
51+
_, err = sb.WriteString(fmt.Sprintf(">> %s:\n%s", name, str))
52+
require.NoError(t, err)
53+
require.NoError(t, b.Commit(false))
54+
}
55+
56+
r := replicaInfo{
57+
id: roachpb.FullReplicaID{RangeID: 123, ReplicaID: 3},
58+
hs: raftpb.HardState{Term: 5, Commit: 14},
59+
ts: kvserverpb.RaftTruncatedState{Index: 10, Term: 5},
60+
keys: roachpb.RSpan{Key: []byte("a"), EndKey: []byte("z")},
61+
last: 15,
62+
applied: 12,
63+
}
64+
mutate("raft", func(rw storage.ReadWriter) {
65+
r.createRaftState(ctx, t, rw)
66+
})
67+
mutate("state", func(rw storage.ReadWriter) {
68+
r.createStateMachine(ctx, t, rw)
69+
})
70+
mutate("destroy", func(rw storage.ReadWriter) {
71+
require.NoError(t, DestroyReplica(ctx, r.id, rw, rw, r.id.ReplicaID+1, ClearRangeDataOptions{
72+
ClearUnreplicatedByRangeID: true,
73+
ClearReplicatedByRangeID: true,
74+
ClearReplicatedBySpan: r.keys,
75+
}))
76+
})
77+
78+
str := strings.ReplaceAll(sb.String(), "\n\n", "\n")
79+
echotest.Require(t, str, filepath.Join(datapathutils.TestDataPath(t), t.Name()+".txt"))
80+
}
81+
82+
// replicaInfo contains the basic info about the replica, used for generating
83+
// its storage counterpart.
84+
//
85+
// TODO(pav-kv): make it reusable for other tests.
86+
type replicaInfo struct {
87+
id roachpb.FullReplicaID
88+
hs raftpb.HardState
89+
ts kvserverpb.RaftTruncatedState
90+
keys roachpb.RSpan
91+
last kvpb.RaftIndex
92+
applied kvpb.RaftIndex
93+
}
94+
95+
func (r *replicaInfo) createRaftState(ctx context.Context, t *testing.T, w storage.Writer) {
96+
sl := logstore.NewStateLoader(r.id.RangeID)
97+
require.NoError(t, sl.SetHardState(ctx, w, r.hs))
98+
require.NoError(t, sl.SetRaftTruncatedState(ctx, w, &r.ts))
99+
for i := r.ts.Index + 1; i <= r.last; i++ {
100+
require.NoError(t, storage.MVCCBlindPutProto(
101+
ctx, w,
102+
sl.RaftLogKey(i), hlc.Timestamp{}, /* timestamp */
103+
&raftpb.Entry{Index: uint64(i), Term: 5},
104+
storage.MVCCWriteOptions{},
105+
))
106+
}
107+
}
108+
109+
func (r *replicaInfo) createStateMachine(ctx context.Context, t *testing.T, rw storage.ReadWriter) {
110+
sl := stateloader.Make(r.id.RangeID)
111+
require.NoError(t, sl.SetRangeTombstone(ctx, rw, kvserverpb.RangeTombstone{
112+
NextReplicaID: r.id.ReplicaID,
113+
}))
114+
require.NoError(t, sl.SetRaftReplicaID(ctx, rw, r.id.ReplicaID))
115+
// TODO(pav-kv): figure out whether LastReplicaGCTimestamp should be in the
116+
// log or state engine.
117+
require.NoError(t, storage.MVCCBlindPutProto(
118+
ctx, rw,
119+
keys.RangeLastReplicaGCTimestampKey(r.id.RangeID),
120+
hlc.Timestamp{}, /* timestamp */
121+
&hlc.Timestamp{WallTime: 12345678},
122+
storage.MVCCWriteOptions{},
123+
))
124+
createRangeData(t, rw, r.keys)
125+
}
126+
127+
func createRangeData(t *testing.T, rw storage.ReadWriter, span roachpb.RSpan) {
128+
ts := hlc.Timestamp{WallTime: 1}
129+
for _, k := range []roachpb.Key{
130+
keys.RangeDescriptorKey(span.Key), // system
131+
span.Key.AsRawKey(), // user
132+
roachpb.Key(span.EndKey).Prevish(2), // user
133+
} {
134+
// Put something under the system or user key.
135+
require.NoError(t, rw.PutMVCC(
136+
storage.MVCCKey{Key: k, Timestamp: ts}, storage.MVCCValue{},
137+
))
138+
// Put something under the corresponding lock key.
139+
ek, _ := storage.LockTableKey{
140+
Key: k, Strength: lock.Intent, TxnUUID: uuid.UUID{},
141+
}.ToEngineKey(nil)
142+
require.NoError(t, rw.PutEngineKey(ek, nil))
143+
}
144+
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)