Skip to content

Commit b91c919

Browse files
craig[bot]fqazinvb
committed
107355: cli: enhance diagnosing contention with redacted debug zips r=fqazi a=fqazi Previously a redacted zip, we would exclude retries and key information for contended keys since they could contain PII data. This patch does the following: - Adds a new builtin is_system_table_key which allows us to know if a key belongs to a system table - Modified redacted debug zips to include data for system keys in contention tables conditionally (if they belong to system tables) - Include retries and last_retry_reason information for queries in cluster insights to help diagnose contention Fixes: cockroachdb#104593 108454: Revert "kvprober: metamorphically enable / configure kvprober" r=andrewbaptist,joshimhoff a=nvanbenschoten This reverts (most of) commit 769ba1c. That commit metamorphically enabled kvprober. This has been observed to be destabliziing to unit tests. When the metamorphic constant is enabled (50% of the time) and when kvprober is fast enough, random ranges will see extra requests that they aren’t expecting. This adds nondeterminism which can trip up tests in any number of different ways. All of the following flakes have been tracked back to kvprober: Fixes cockroachdb#107864. Fixes cockroachdb#108242. Fixes cockroachdb#108441. Fixes cockroachdb#108349. Fixes cockroachdb#108124. Closes cockroachdb#108366. Release note: None Co-authored-by: Faizan Qazi <[email protected]> Co-authored-by: Nathan VanBenschoten <[email protected]>
3 parents 99d248b + 2d82473 + 1374908 commit b91c919

File tree

8 files changed

+64
-10
lines changed

8 files changed

+64
-10
lines changed

pkg/cli/zip_table_registry.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,12 @@ func (r DebugZipTableRegistry) GetTables() []string {
107107
var zipInternalTablesPerCluster = DebugZipTableRegistry{
108108
"crdb_internal.cluster_contention_events": {
109109
// `key` column contains the contended key, which may contain sensitive
110-
// row-level data.
110+
// row-level data. So, we will only fetch if the table is under the system
111+
// schema.
111112
nonSensitiveCols: NonSensitiveColumns{
112113
"table_id",
113114
"index_id",
115+
"IF(crdb_internal.is_system_table_key(key), crdb_internal.pretty_key(key, 0) ,'redacted') as pretty_key",
114116
"num_contention_events",
115117
"cumulative_contention_time",
116118
"txn_id",
@@ -158,6 +160,8 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{
158160
"exec_node_ids",
159161
"contention",
160162
"index_recommendations",
163+
"retries",
164+
"last_retry_reason",
161165
},
162166
},
163167
"crdb_internal.cluster_locks": {
@@ -531,14 +535,16 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{
531535
},
532536
"crdb_internal.transaction_contention_events": {
533537
// `contending_key` column contains the contended key, which may
534-
// contain sensitive row-level data.
538+
// contain sensitive row-level data. So, we will only fetch if the
539+
// table is under the system schema.
535540
nonSensitiveCols: NonSensitiveColumns{
536541
"collection_ts",
537542
"blocking_txn_id",
538543
"blocking_txn_fingerprint_id",
539544
"waiting_txn_id",
540545
"waiting_txn_fingerprint_id",
541546
"contention_duration",
547+
"IF(crdb_internal.is_system_table_key(contending_key), crdb_internal.pretty_key(contending_key, 0) ,'redacted') as contending_pretty_key",
542548
},
543549
},
544550
"crdb_internal.zones": {

pkg/kv/kvprober/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ go_library(
1717
"//pkg/roachpb",
1818
"//pkg/settings",
1919
"//pkg/settings/cluster",
20-
"//pkg/util",
2120
"//pkg/util/log",
2221
"//pkg/util/log/logcrash",
2322
"//pkg/util/metric",

pkg/kv/kvprober/settings.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,9 @@ import (
1414
"time"
1515

1616
"github.com/cockroachdb/cockroach/pkg/settings"
17-
"github.com/cockroachdb/cockroach/pkg/util"
1817
"github.com/cockroachdb/errors"
1918
)
2019

21-
var defaultEnabled = util.ConstantWithMetamorphicTestBool("kv.prober.*.enabled", false)
22-
2320
// kv.prober.bypass_admission_control controls whether kvprober's requests
2421
// should bypass kv layer's admission control. Setting this value to true
2522
// ensures that kvprober will not be significantly affected if the cluster is
@@ -30,13 +27,14 @@ var bypassAdmissionControl = settings.RegisterBoolSetting(
3027
"set to bypass admission control queue for kvprober requests; "+
3128
"note that dedicated clusters should have this set as users own capacity planning "+
3229
"but serverless clusters should not have this set as SREs own capacity planning",
33-
util.ConstantWithMetamorphicTestBool("kv.prober.bypass_admission_control.enabled", true))
30+
true,
31+
)
3432

3533
var readEnabled = settings.RegisterBoolSetting(
3634
settings.TenantWritable,
3735
"kv.prober.read.enabled",
3836
"whether the KV read prober is enabled",
39-
defaultEnabled)
37+
false)
4038

4139
// TODO(josh): Another option is for the cluster setting to be a QPS target
4240
// for the cluster as a whole.
@@ -72,7 +70,7 @@ var writeEnabled = settings.RegisterBoolSetting(
7270
settings.TenantWritable,
7371
"kv.prober.write.enabled",
7472
"whether the KV write prober is enabled",
75-
defaultEnabled)
73+
false)
7674

7775
var writeInterval = settings.RegisterDurationSetting(
7876
settings.TenantWritable,
@@ -150,7 +148,7 @@ var quarantineWriteEnabled = settings.RegisterBoolSetting(
150148
"quarantine pool holds a separate group of ranges that have previously failed "+
151149
"a probe which are continually probed. This helps determine outages for ranges "+
152150
" with a high level of confidence",
153-
defaultEnabled)
151+
false)
154152

155153
var quarantineWriteInterval = settings.RegisterDurationSetting(
156154
settings.TenantWritable,

pkg/sql/faketreeeval/evalctx.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,11 @@ func (ep *DummyPrivilegedAccessor) LookupZoneConfigByNamespaceID(
535535
return "", false, errors.WithStack(errEvalPrivileged)
536536
}
537537

538+
// IsSystemTable is part of the tree.PrivilegedAccessor interface.
539+
func (ep *DummyPrivilegedAccessor) IsSystemTable(ctx context.Context, id int64) (bool, error) {
540+
return false, errors.WithStack(errEvalPrivileged)
541+
}
542+
538543
// DummySessionAccessor implements the eval.SessionAccessor interface by returning errors.
539544
type DummySessionAccessor struct{}
540545

pkg/sql/privileged_accessor.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
2020
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
2121
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
22+
"github.com/cockroachdb/cockroach/pkg/sql/sem/catid"
2223
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
2324
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
2425
"github.com/cockroachdb/errors"
@@ -76,6 +77,15 @@ func (p *planner) LookupZoneConfigByNamespaceID(
7677
return tree.DBytes(zc.GetRawBytesInStorage()), true, nil
7778
}
7879

80+
// IsSystemTable implements tree.PrivilegedAccessor.
81+
func (p *planner) IsSystemTable(ctx context.Context, id int64) (bool, error) {
82+
tbl, err := p.Descriptors().ByID(p.Txn()).Get().Table(ctx, catid.DescID(id))
83+
if err != nil {
84+
return false, err
85+
}
86+
return catalog.IsSystemDescriptor(tbl), nil
87+
}
88+
7989
// checkDescriptorPermissions returns nil if the executing user has permissions
8090
// to check the permissions of a descriptor given its ID, or the id given
8191
// is not a descriptor of a table or database.

pkg/sql/sem/builtins/builtins.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5728,6 +5728,38 @@ SELECT
57285728
Volatility: volatility.Immutable,
57295729
},
57305730
),
5731+
// Return if a key belongs to a system table, which should make it to print
5732+
// within redacted output.
5733+
"crdb_internal.is_system_table_key": makeBuiltin(
5734+
tree.FunctionProperties{
5735+
Category: builtinconstants.CategorySystemInfo,
5736+
Undocumented: true,
5737+
},
5738+
tree.Overload{
5739+
Types: tree.ParamTypes{
5740+
{Name: "raw_key", Typ: types.Bytes},
5741+
},
5742+
ReturnType: tree.FixedReturnType(types.Bool),
5743+
Fn: func(ctx context.Context, evalCtx *eval.Context, args tree.Datums) (tree.Datum, error) {
5744+
_, tableID, err := evalCtx.Codec.DecodeTablePrefix(roachpb.Key(tree.MustBeDBytes(args[0])))
5745+
if err != nil {
5746+
// If a key isn't prefixed with a table ID ignore.
5747+
//nolint:returnerrcheck
5748+
return tree.DBoolFalse, nil
5749+
}
5750+
isSystemTable, err := evalCtx.PrivilegedAccessor.IsSystemTable(ctx, int64(tableID))
5751+
if err != nil {
5752+
// If we can't find the descriptor or its not the right type then its
5753+
// not a system table.
5754+
//nolint:returnerrcheck
5755+
return tree.DBoolFalse, nil
5756+
}
5757+
return tree.MakeDBool(tree.DBool(isSystemTable)), nil
5758+
},
5759+
Info: "This function is used only by CockroachDB's developers for testing purposes.",
5760+
Volatility: volatility.Stable,
5761+
},
5762+
),
57315763

57325764
// Return a pretty string for a given span, skipping the specified number of
57335765
// fields.

pkg/sql/sem/builtins/fixed_oids.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,6 +2439,7 @@ var builtinOidsArray = []string{
24392439
2466: `crdb_internal.setup_span_configs_stream(tenant_name: string) -> bytes`,
24402440
2467: `crdb_internal.request_statement_bundle(stmtFingerprint: string, planGist: string, samplingProbability: float, minExecutionLatency: interval, expiresAfter: interval) -> bool`,
24412441
2468: `crdb_internal.request_statement_bundle(stmtFingerprint: string, planGist: string, antiPlanGist: bool, samplingProbability: float, minExecutionLatency: interval, expiresAfter: interval) -> bool`,
2442+
2469: `crdb_internal.is_system_table_key(raw_key: bytes) -> bool`,
24422443
}
24432444

24442445
var builtinOidsBySignature map[string]oid.Oid

pkg/sql/sem/eval/deps.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,9 @@ type PrivilegedAccessor interface {
534534
// Returns the config byte array, a bool representing whether the namespace exists,
535535
// and an error if there is one.
536536
LookupZoneConfigByNamespaceID(ctx context.Context, id int64) (tree.DBytes, bool, error)
537+
538+
// IsSystemTable returns if a given descriptor ID is a system table.s
539+
IsSystemTable(ctx context.Context, id int64) (bool, error)
537540
}
538541

539542
// RegionOperator gives access to the current region, validation for all

0 commit comments

Comments
 (0)