Skip to content

Commit 1654ef0

Browse files
committed
sql, tracing: add cluster setting to omit internal txns from tracing
A cluster setting has been added, `sql.trace.txn.include_internal.enabled` which is enabled by default, to include internal transactions in the probabilistic transaction tracing and latency capture. Typically we want this off, but it's `true` by default since that's always been the behavior until now. It's recommended that this be disabled when debugging a customer workload. Epic: None Release note (obs change): when enabling probabilistic transaction tracing via the `sql.trace.txn.enable_threshold` and `sql.trace.txn.sample_rate` cluster settings, it's typical that you would want to omit internal transactions from being considered for tracing and logging. You can now do this by setting `sql.trace.txn.include_internal.enabled` to false.
1 parent d2ba9a9 commit 1654ef0

File tree

6 files changed

+75
-6
lines changed

6 files changed

+75
-6
lines changed

docs/generated/settings/settings-for-tenants.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ sql.temp_object_cleaner.wait_interval duration 30m0s how long after creation a t
390390
sql.log.all_statements.enabled (alias: sql.trace.log_statement_execute) boolean false set to true to enable logging of all executed statements application
391391
sql.trace.stmt.enable_threshold duration 0s enables tracing on all statements; statements executing for longer than this duration will have their trace logged (set to 0 to disable); note that enabling this may have a negative performance impact; this setting applies to individual statements within a transaction and is therefore finer-grained than sql.trace.txn.enable_threshold application
392392
sql.trace.txn.enable_threshold duration 0s enables transaction traces for transactions exceeding this duration, used with `sql.trace.txn.sample_rate` application
393+
sql.trace.txn.include_internal.enabled boolean true enables tracing internal transactions as well as external workload using sample rate and threshold settings application
393394
sql.trace.txn.jaeger_json_output.enabled boolean false enables Jaeger JSON output for transaction traces in logs application
394395
sql.trace.txn.sample_rate float 1 enables probabilistic transaction tracing. It should be used in conjunction with `sql.trace.txn.enable_threshold`. A percentage of transactions between 0 and 1.0 will have tracing enabled, and only those which exceed the configured threshold will be logged. application
395396
sql.ttl.changefeed_replication.disabled boolean false if true, deletes issued by TTL will not be replicated via changefeeds (this setting will be ignored by changefeeds that have the ignore_disable_changefeed_replication option set; such changefeeds will continue to replicate all TTL deletes) application

docs/generated/settings/settings.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@
345345
<tr><td><div id="setting-sql-trace-log-statement-execute" class="anchored"><code>sql.log.all_statements.enabled<br />(alias: sql.trace.log_statement_execute)</code></div></td><td>boolean</td><td><code>false</code></td><td>set to true to enable logging of all executed statements</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
346346
<tr><td><div id="setting-sql-trace-stmt-enable-threshold" class="anchored"><code>sql.trace.stmt.enable_threshold</code></div></td><td>duration</td><td><code>0s</code></td><td>enables tracing on all statements; statements executing for longer than this duration will have their trace logged (set to 0 to disable); note that enabling this may have a negative performance impact; this setting applies to individual statements within a transaction and is therefore finer-grained than sql.trace.txn.enable_threshold</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
347347
<tr><td><div id="setting-sql-trace-txn-enable-threshold" class="anchored"><code>sql.trace.txn.enable_threshold</code></div></td><td>duration</td><td><code>0s</code></td><td>enables transaction traces for transactions exceeding this duration, used with `sql.trace.txn.sample_rate`</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
348+
<tr><td><div id="setting-sql-trace-txn-include-internal-enabled" class="anchored"><code>sql.trace.txn.include_internal.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>enables tracing internal transactions as well as external workload using sample rate and threshold settings</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
348349
<tr><td><div id="setting-sql-trace-txn-jaeger-json-output-enabled" class="anchored"><code>sql.trace.txn.jaeger_json_output.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>enables Jaeger JSON output for transaction traces in logs</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
349350
<tr><td><div id="setting-sql-trace-txn-sample-rate" class="anchored"><code>sql.trace.txn.sample_rate</code></div></td><td>float</td><td><code>1</code></td><td>enables probabilistic transaction tracing. It should be used in conjunction with `sql.trace.txn.enable_threshold`. A percentage of transactions between 0 and 1.0 will have tracing enabled, and only those which exceed the configured threshold will be logged.</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
350351
<tr><td><div id="setting-sql-ttl-changefeed-replication-disabled" class="anchored"><code>sql.ttl.changefeed_replication.disabled</code></div></td><td>boolean</td><td><code>false</code></td><td>if true, deletes issued by TTL will not be replicated via changefeeds (this setting will be ignored by changefeeds that have the ignore_disable_changefeed_replication option set; such changefeeds will continue to replicate all TTL deletes)</td><td>Serverless/Dedicated/Self-Hosted</td></tr>

pkg/sql/conn_executor.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,7 @@ func (s *Server) newConnExecutor(
11631163
mon: txnMon,
11641164
connCtx: ctx,
11651165
testingForceRealTracingSpans: s.cfg.TestingKnobs.ForceRealTracingSpans,
1166+
execType: executorType,
11661167
},
11671168
transitionCtx: transitionCtx{
11681169
db: s.cfg.DB,

pkg/sql/exec_util.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,16 @@ var TraceTxnOutputJaegerJSON = settings.RegisterBoolSetting(
291291
false,
292292
settings.WithPublic)
293293

294+
// TraceTxnIncludeInternal when enabled will also subject internal queries to trace capture.
295+
var TraceTxnIncludeInternal = settings.RegisterBoolSetting(
296+
settings.ApplicationLevel,
297+
"sql.trace.txn.include_internal.enabled",
298+
"enables tracing internal transactions as well as external workload using "+
299+
"sample rate and threshold settings",
300+
true,
301+
settings.WithPublic,
302+
)
303+
294304
// TraceStmtThreshold is identical to traceTxnThreshold except it applies to
295305
// individual statements in a transaction. The motivation for this setting is
296306
// to be able to reduce the noise associated with a larger transaction (e.g.

pkg/sql/trace_test.go

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
2222
"github.com/cockroachdb/cockroach/pkg/sql"
2323
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
24+
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
2425
"github.com/cockroachdb/cockroach/pkg/testutils"
2526
"github.com/cockroachdb/cockroach/pkg/testutils/pgurlutils"
2627
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
@@ -615,6 +616,8 @@ func TestTraceTxnSampleRateAndThreshold(t *testing.T) {
615616
sampleRate float64
616617
threshold time.Duration
617618
exptToTraceEventually bool
619+
checkJaegerOutput bool
620+
checkExcludeInternal bool
618621
}{
619622
{
620623
name: "no sample rate and no threshold",
@@ -646,6 +649,20 @@ func TestTraceTxnSampleRateAndThreshold(t *testing.T) {
646649
threshold: 1 * time.Nanosecond,
647650
exptToTraceEventually: true,
648651
},
652+
{
653+
name: "jaeger output with sample rate 1.0 and threshold 1ns should trace",
654+
sampleRate: 1.0,
655+
threshold: 1 * time.Nanosecond,
656+
exptToTraceEventually: true,
657+
checkJaegerOutput: true,
658+
},
659+
{
660+
name: "internal queries omitted with cluster setting",
661+
sampleRate: 1.0,
662+
threshold: 1 * time.Nanosecond,
663+
exptToTraceEventually: true,
664+
checkExcludeInternal: true,
665+
},
649666
} {
650667
t.Run(tc.name, func(t *testing.T) {
651668
sql.TraceTxnThreshold.Override(ctx, &settings.SV, tc.threshold)
@@ -655,14 +672,46 @@ func TestTraceTxnSampleRateAndThreshold(t *testing.T) {
655672
r := sqlutils.MakeSQLRunner(db)
656673

657674
if tc.exptToTraceEventually {
658-
testutils.SucceedsSoon(t, func() error {
659-
r.Exec(t, "SELECT pg_sleep(0.01)")
675+
if tc.checkJaegerOutput {
676+
sql.TraceTxnOutputJaegerJSON.Override(ctx, &settings.SV, true)
677+
testutils.SucceedsSoon(t, func() error {
678+
r.Exec(t, "SELECT pg_sleep(0.01)")
679+
log.FlushAllSync()
680+
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
681+
return errors.New("no sql txn log found (tracing did not happen)")
682+
}
683+
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("{\"refType\":\"CHILD_OF\",\"traceID\":\""))) {
684+
return errors.New("no Jaeger JSON found")
685+
}
686+
return nil
687+
})
688+
} else if tc.checkExcludeInternal {
689+
sql.TraceTxnIncludeInternal.Override(ctx, &settings.SV, false)
660690
log.FlushAllSync()
661-
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
662-
return errors.New("no sql txn log found (tracing did not happen)")
691+
appLogsSpy.Reset() // Clear logs after setting the cluster setting
692+
693+
// Use the internal executor directly to create actual internal transactions
694+
ie := s.InternalExecutor().(*sql.InternalExecutor)
695+
_, err := ie.ExecEx(ctx, "test-internal-query", nil, /* txn */
696+
sessiondata.NodeUserSessionDataOverride,
697+
"SELECT pg_sleep(0.01)")
698+
if err != nil {
699+
t.Fatal(err)
663700
}
664-
return nil
665-
})
701+
log.FlushAllSync()
702+
if appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
703+
t.Fatal("internal sql txn log found when internal transactions should be excluded from tracing")
704+
}
705+
} else {
706+
testutils.SucceedsSoon(t, func() error {
707+
r.Exec(t, "SELECT pg_sleep(0.01)")
708+
log.FlushAllSync()
709+
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
710+
return errors.New("no sql txn log found (tracing did not happen)")
711+
}
712+
return nil
713+
})
714+
}
666715
} else {
667716
r.Exec(t, "SELECT pg_sleep(0.01)")
668717
log.FlushAllSync()

pkg/sql/txn_state.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ type txnState struct {
142142
// testingForceRealTracingSpans is a test-only knob that forces the use of
143143
// real (i.e. not no-op) tracing spans for every statement.
144144
testingForceRealTracingSpans bool
145+
146+
// execType records the executor type for the transaction.
147+
execType executorType
145148
}
146149

147150
// txnType represents the type of a SQL transaction.
@@ -218,7 +221,11 @@ func (ts *txnState) resetForNewSQLTxn(
218221
duration := TraceTxnThreshold.Get(&tranCtx.settings.SV)
219222

220223
sampleRate := TraceTxnSampleRate.Get(&tranCtx.settings.SV)
224+
includeInternal := TraceTxnIncludeInternal.Get(&tranCtx.settings.SV)
221225
ts.shouldRecord = sampleRate > 0 && duration > 0 && rng.Float64() < sampleRate
226+
if !includeInternal && ts.execType == executorTypeInternal {
227+
ts.shouldRecord = false
228+
}
222229
ts.outputJaegerJSON = TraceTxnOutputJaegerJSON.Get(&tranCtx.settings.SV)
223230

224231
if alreadyRecording || ts.shouldRecord {

0 commit comments

Comments
 (0)