Skip to content

Commit 3824cc2

Browse files
craig[bot]dhartunian
andcommitted
Merge #151433
151433: sql, tracing: add cluster setting to omit internal txns from tracing r=dhartunian a=dhartunian A cluster setting has been added, `sql.trace.txn.include_internal.enabled` which is enabled by default, to include internal transactions in the probabilistic transaction tracing and latency capture. Typically we want this off, but it's `true` by default since that's always been the behavior until now. It's recommended that this be disabled when debugging a customer workload. Epic: None Release note (obs change): when enabling probabilistic transaction tracing via the `sql.trace.txn.enable_threshold` and `sql.trace.txn.sample_rate` cluster settings, it's typical that you would want to omit internal transactions from being considered for tracing and logging. You can now do this by setting `sql.trace.txn.include_internal.enabled` to false. ---- Prior commit is being reviewed here: #151414 Also tests are not correct yet, I'm trying to use this patch to consolidate tests from the last one to prevent lots of small tests that have to start their own servers. Co-authored-by: David Hartunian <[email protected]>
2 parents d2ba9a9 + 1654ef0 commit 3824cc2

File tree

6 files changed

+75
-6
lines changed

6 files changed

+75
-6
lines changed

docs/generated/settings/settings-for-tenants.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ sql.temp_object_cleaner.wait_interval duration 30m0s how long after creation a t
390390
sql.log.all_statements.enabled (alias: sql.trace.log_statement_execute) boolean false set to true to enable logging of all executed statements application
391391
sql.trace.stmt.enable_threshold duration 0s enables tracing on all statements; statements executing for longer than this duration will have their trace logged (set to 0 to disable); note that enabling this may have a negative performance impact; this setting applies to individual statements within a transaction and is therefore finer-grained than sql.trace.txn.enable_threshold application
392392
sql.trace.txn.enable_threshold duration 0s enables transaction traces for transactions exceeding this duration, used with `sql.trace.txn.sample_rate` application
393+
sql.trace.txn.include_internal.enabled boolean true enables tracing internal transactions as well as external workload using sample rate and threshold settings application
393394
sql.trace.txn.jaeger_json_output.enabled boolean false enables Jaeger JSON output for transaction traces in logs application
394395
sql.trace.txn.sample_rate float 1 enables probabilistic transaction tracing. It should be used in conjunction with `sql.trace.txn.enable_threshold`. A percentage of transactions between 0 and 1.0 will have tracing enabled, and only those which exceed the configured threshold will be logged. application
395396
sql.ttl.changefeed_replication.disabled boolean false if true, deletes issued by TTL will not be replicated via changefeeds (this setting will be ignored by changefeeds that have the ignore_disable_changefeed_replication option set; such changefeeds will continue to replicate all TTL deletes) application

docs/generated/settings/settings.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@
345345
<tr><td><div id="setting-sql-trace-log-statement-execute" class="anchored"><code>sql.log.all_statements.enabled<br />(alias: sql.trace.log_statement_execute)</code></div></td><td>boolean</td><td><code>false</code></td><td>set to true to enable logging of all executed statements</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
346346
<tr><td><div id="setting-sql-trace-stmt-enable-threshold" class="anchored"><code>sql.trace.stmt.enable_threshold</code></div></td><td>duration</td><td><code>0s</code></td><td>enables tracing on all statements; statements executing for longer than this duration will have their trace logged (set to 0 to disable); note that enabling this may have a negative performance impact; this setting applies to individual statements within a transaction and is therefore finer-grained than sql.trace.txn.enable_threshold</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
347347
<tr><td><div id="setting-sql-trace-txn-enable-threshold" class="anchored"><code>sql.trace.txn.enable_threshold</code></div></td><td>duration</td><td><code>0s</code></td><td>enables transaction traces for transactions exceeding this duration, used with `sql.trace.txn.sample_rate`</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
348+
<tr><td><div id="setting-sql-trace-txn-include-internal-enabled" class="anchored"><code>sql.trace.txn.include_internal.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>enables tracing internal transactions as well as external workload using sample rate and threshold settings</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
348349
<tr><td><div id="setting-sql-trace-txn-jaeger-json-output-enabled" class="anchored"><code>sql.trace.txn.jaeger_json_output.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>enables Jaeger JSON output for transaction traces in logs</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
349350
<tr><td><div id="setting-sql-trace-txn-sample-rate" class="anchored"><code>sql.trace.txn.sample_rate</code></div></td><td>float</td><td><code>1</code></td><td>enables probabilistic transaction tracing. It should be used in conjunction with `sql.trace.txn.enable_threshold`. A percentage of transactions between 0 and 1.0 will have tracing enabled, and only those which exceed the configured threshold will be logged.</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
350351
<tr><td><div id="setting-sql-ttl-changefeed-replication-disabled" class="anchored"><code>sql.ttl.changefeed_replication.disabled</code></div></td><td>boolean</td><td><code>false</code></td><td>if true, deletes issued by TTL will not be replicated via changefeeds (this setting will be ignored by changefeeds that have the ignore_disable_changefeed_replication option set; such changefeeds will continue to replicate all TTL deletes)</td><td>Serverless/Dedicated/Self-Hosted</td></tr>

pkg/sql/conn_executor.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,7 @@ func (s *Server) newConnExecutor(
11631163
mon: txnMon,
11641164
connCtx: ctx,
11651165
testingForceRealTracingSpans: s.cfg.TestingKnobs.ForceRealTracingSpans,
1166+
execType: executorType,
11661167
},
11671168
transitionCtx: transitionCtx{
11681169
db: s.cfg.DB,

pkg/sql/exec_util.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,16 @@ var TraceTxnOutputJaegerJSON = settings.RegisterBoolSetting(
291291
false,
292292
settings.WithPublic)
293293

294+
// TraceTxnIncludeInternal when enabled will also subject internal queries to trace capture.
295+
var TraceTxnIncludeInternal = settings.RegisterBoolSetting(
296+
settings.ApplicationLevel,
297+
"sql.trace.txn.include_internal.enabled",
298+
"enables tracing internal transactions as well as external workload using "+
299+
"sample rate and threshold settings",
300+
true,
301+
settings.WithPublic,
302+
)
303+
294304
// TraceStmtThreshold is identical to traceTxnThreshold except it applies to
295305
// individual statements in a transaction. The motivation for this setting is
296306
// to be able to reduce the noise associated with a larger transaction (e.g.

pkg/sql/trace_test.go

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
2222
"github.com/cockroachdb/cockroach/pkg/sql"
2323
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
24+
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
2425
"github.com/cockroachdb/cockroach/pkg/testutils"
2526
"github.com/cockroachdb/cockroach/pkg/testutils/pgurlutils"
2627
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
@@ -615,6 +616,8 @@ func TestTraceTxnSampleRateAndThreshold(t *testing.T) {
615616
sampleRate float64
616617
threshold time.Duration
617618
exptToTraceEventually bool
619+
checkJaegerOutput bool
620+
checkExcludeInternal bool
618621
}{
619622
{
620623
name: "no sample rate and no threshold",
@@ -646,6 +649,20 @@ func TestTraceTxnSampleRateAndThreshold(t *testing.T) {
646649
threshold: 1 * time.Nanosecond,
647650
exptToTraceEventually: true,
648651
},
652+
{
653+
name: "jaeger output with sample rate 1.0 and threshold 1ns should trace",
654+
sampleRate: 1.0,
655+
threshold: 1 * time.Nanosecond,
656+
exptToTraceEventually: true,
657+
checkJaegerOutput: true,
658+
},
659+
{
660+
name: "internal queries omitted with cluster setting",
661+
sampleRate: 1.0,
662+
threshold: 1 * time.Nanosecond,
663+
exptToTraceEventually: true,
664+
checkExcludeInternal: true,
665+
},
649666
} {
650667
t.Run(tc.name, func(t *testing.T) {
651668
sql.TraceTxnThreshold.Override(ctx, &settings.SV, tc.threshold)
@@ -655,14 +672,46 @@ func TestTraceTxnSampleRateAndThreshold(t *testing.T) {
655672
r := sqlutils.MakeSQLRunner(db)
656673

657674
if tc.exptToTraceEventually {
658-
testutils.SucceedsSoon(t, func() error {
659-
r.Exec(t, "SELECT pg_sleep(0.01)")
675+
if tc.checkJaegerOutput {
676+
sql.TraceTxnOutputJaegerJSON.Override(ctx, &settings.SV, true)
677+
testutils.SucceedsSoon(t, func() error {
678+
r.Exec(t, "SELECT pg_sleep(0.01)")
679+
log.FlushAllSync()
680+
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
681+
return errors.New("no sql txn log found (tracing did not happen)")
682+
}
683+
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("{\"refType\":\"CHILD_OF\",\"traceID\":\""))) {
684+
return errors.New("no Jaeger JSON found")
685+
}
686+
return nil
687+
})
688+
} else if tc.checkExcludeInternal {
689+
sql.TraceTxnIncludeInternal.Override(ctx, &settings.SV, false)
660690
log.FlushAllSync()
661-
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
662-
return errors.New("no sql txn log found (tracing did not happen)")
691+
appLogsSpy.Reset() // Clear logs after setting the cluster setting
692+
693+
// Use the internal executor directly to create actual internal transactions
694+
ie := s.InternalExecutor().(*sql.InternalExecutor)
695+
_, err := ie.ExecEx(ctx, "test-internal-query", nil, /* txn */
696+
sessiondata.NodeUserSessionDataOverride,
697+
"SELECT pg_sleep(0.01)")
698+
if err != nil {
699+
t.Fatal(err)
663700
}
664-
return nil
665-
})
701+
log.FlushAllSync()
702+
if appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
703+
t.Fatal("internal sql txn log found when internal transactions should be excluded from tracing")
704+
}
705+
} else {
706+
testutils.SucceedsSoon(t, func() error {
707+
r.Exec(t, "SELECT pg_sleep(0.01)")
708+
log.FlushAllSync()
709+
if !appLogsSpy.Has(logtestutils.MatchesF(regexp.QuoteMeta("ExecStmt: SELECT pg_sleep(0.01)"))) {
710+
return errors.New("no sql txn log found (tracing did not happen)")
711+
}
712+
return nil
713+
})
714+
}
666715
} else {
667716
r.Exec(t, "SELECT pg_sleep(0.01)")
668717
log.FlushAllSync()

pkg/sql/txn_state.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ type txnState struct {
142142
// testingForceRealTracingSpans is a test-only knob that forces the use of
143143
// real (i.e. not no-op) tracing spans for every statement.
144144
testingForceRealTracingSpans bool
145+
146+
// execType records the executor type for the transaction.
147+
execType executorType
145148
}
146149

147150
// txnType represents the type of a SQL transaction.
@@ -218,7 +221,11 @@ func (ts *txnState) resetForNewSQLTxn(
218221
duration := TraceTxnThreshold.Get(&tranCtx.settings.SV)
219222

220223
sampleRate := TraceTxnSampleRate.Get(&tranCtx.settings.SV)
224+
includeInternal := TraceTxnIncludeInternal.Get(&tranCtx.settings.SV)
221225
ts.shouldRecord = sampleRate > 0 && duration > 0 && rng.Float64() < sampleRate
226+
if !includeInternal && ts.execType == executorTypeInternal {
227+
ts.shouldRecord = false
228+
}
222229
ts.outputJaegerJSON = TraceTxnOutputJaegerJSON.Get(&tranCtx.settings.SV)
223230

224231
if alreadyRecording || ts.shouldRecord {

0 commit comments

Comments
 (0)