Skip to content

Commit c1af51d

Browse files
committed
sql: harden recent fix to top-level query stats with routines
This commit relaxes the requirements for when it's safe to set the routine metadata forwarder. Previously, we only set it when we end up using the RootTxn in a local plan, but now we examine all possible kinds of concurrency within the local plan to see whether it's actually safe to set the metadata forwarder. This seems like a nice cleanup as well. Note that this commit independently fixes the issue mentioned in the previous commit. Release note: None
1 parent 63eebf3 commit c1af51d

File tree

6 files changed

+99
-31
lines changed

6 files changed

+99
-31
lines changed

pkg/sql/apply_join.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
1313
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
14+
"github.com/cockroachdb/cockroach/pkg/sql/distsql"
1415
"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
1516
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
1617
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
@@ -321,7 +322,7 @@ func runPlanInsidePlan(
321322
recv,
322323
&subqueryResultMemAcc,
323324
false, /* skipDistSQLDiagramGeneration */
324-
params.p.mustUseLeafTxn(),
325+
params.p.innerPlansMustUseLeafTxn(),
325326
) {
326327
return recv.stats, resultWriter.Err()
327328
}
@@ -336,7 +337,9 @@ func runPlanInsidePlan(
336337
planCtx := execCfg.DistSQLPlanner.NewPlanningCtx(ctx, evalCtx, &plannerCopy, plannerCopy.txn, distributeType)
337338
planCtx.distSQLProhibitedErr = distSQLProhibitedErr
338339
planCtx.stmtType = recv.stmtType
339-
planCtx.mustUseLeafTxn = params.p.mustUseLeafTxn()
340+
if params.p.innerPlansMustUseLeafTxn() {
341+
planCtx.flowConcurrency = distsql.ConcurrencyWithOuterPlan
342+
}
340343
planCtx.stmtForDistSQLDiagram = stmtForDistSQLDiagram
341344

342345
// Wrap PlanAndRun in a function call so that we clean up immediately.

pkg/sql/distsql/server.go

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,28 @@ func newFlow(
545545
return rowflow.NewRowBasedFlow(base)
546546
}
547547

548+
// ConcurrencyKind indicates which concurrency type is present within the local
549+
// DistSQL flow. Note that inter-node concurrency (i.e. whether we have a
550+
// distributed plan) is not reflected here.
551+
type ConcurrencyKind uint32
552+
553+
const (
554+
// ConcurrencyHasParallelProcessors, if set, indicates that we have multiple
555+
// processors running for the same plan stage.
556+
ConcurrencyHasParallelProcessors ConcurrencyKind = (1 << iota)
557+
// ConcurrencyStreamer, if set, indicates we have concurrency due to usage
558+
// of the Streamer API.
559+
ConcurrencyStreamer
560+
// ConcurrencyParallelChecks, if set, indicates that we're running
561+
// post-query CHECKs in parallel with each other (i.e. the concurrency is
562+
// with _other_ local flows).
563+
ConcurrencyParallelChecks
564+
// ConcurrencyWithOuterPlan, if set, indicates that - if we're running an
565+
// "inner" plan (like an apply-join iteration or a routine) - we might have
566+
// concurrency with the "outer" plan.
567+
ConcurrencyWithOuterPlan
568+
)
569+
548570
// LocalState carries information that is required to set up a flow with wrapped
549571
// planNodes.
550572
type LocalState struct {
@@ -559,13 +581,9 @@ type LocalState struct {
559581
// remote flows.
560582
IsLocal bool
561583

562-
// HasConcurrency indicates whether the local flow uses multiple goroutines.
563-
HasConcurrency bool
564-
565-
// MustUseLeaf indicates whether the local flow must use the LeafTxn even if
566-
// there is no concurrency in the flow on its own because there would be
567-
// concurrency with other flows which prohibits the usage of the RootTxn.
568-
MustUseLeaf bool
584+
// concurrency tracks the types of concurrency present when accessing the
585+
// Txn.
586+
concurrency ConcurrencyKind
569587

570588
// Txn is filled in on the gateway only. It is the RootTxn that the query is running in.
571589
// This will be used directly by the flow if the flow has no concurrency and IsLocal is set.
@@ -582,10 +600,22 @@ type LocalState struct {
582600
LocalVectorSources map[int32]any
583601
}
584602

603+
// AddConcurrency marks the given concurrency kinds as present in the local
604+
// flow.
605+
func (l *LocalState) AddConcurrency(kind ConcurrencyKind) {
606+
l.concurrency |= kind
607+
}
608+
609+
// GetConcurrency returns the bit-mask representing all concurrency kinds
610+
// present in the local flow.
611+
func (l LocalState) GetConcurrency() ConcurrencyKind {
612+
return l.concurrency
613+
}
614+
585615
// MustUseLeafTxn returns true if a LeafTxn must be used. It is valid to call
586-
// this method only after IsLocal and HasConcurrency have been set correctly.
616+
// this method only after IsLocal and all concurrency kinds have been set.
587617
func (l LocalState) MustUseLeafTxn() bool {
588-
return !l.IsLocal || l.HasConcurrency || l.MustUseLeaf
618+
return !l.IsLocal || l.concurrency != 0
589619
}
590620

591621
// SetupLocalSyncFlow sets up a synchronous flow on the current (planning) node,

pkg/sql/distsql_physical_planner.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,11 +1020,11 @@ type PlanningCtx struct {
10201020
// query).
10211021
subOrPostQuery bool
10221022

1023-
// mustUseLeafTxn, if set, indicates that this PlanningCtx is used to handle
1023+
// flowConcurrency will be non-zero when this PlanningCtx is used to handle
10241024
// one of the plans that will run in parallel with other plans. As such, the
10251025
// DistSQL planner will need to use the LeafTxn (even if it's not needed
10261026
// based on other "regular" factors).
1027-
mustUseLeafTxn bool
1027+
flowConcurrency distsql.ConcurrencyKind
10281028

10291029
// onFlowCleanup contains non-nil functions that will be called after the
10301030
// local flow finished running and is being cleaned up. It allows us to

pkg/sql/distsql_running.go

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,7 @@ func (dsp *DistSQLPlanner) Run(
754754
// the line.
755755
localState.EvalContext = evalCtx
756756
localState.IsLocal = planCtx.isLocal
757-
localState.MustUseLeaf = planCtx.mustUseLeafTxn
757+
localState.AddConcurrency(planCtx.flowConcurrency)
758758
localState.Txn = txn
759759
localState.LocalProcs = plan.LocalProcessors
760760
localState.LocalVectorSources = plan.LocalVectorSources
@@ -777,15 +777,19 @@ func (dsp *DistSQLPlanner) Run(
777777
// cannot create a LeafTxn, so we cannot parallelize scans.
778778
planCtx.parallelizeScansIfLocal = false
779779
for _, flow := range flows {
780-
localState.HasConcurrency = localState.HasConcurrency || execinfra.HasParallelProcessors(flow)
780+
if execinfra.HasParallelProcessors(flow) {
781+
localState.AddConcurrency(distsql.ConcurrencyHasParallelProcessors)
782+
}
781783
}
782784
} else {
783785
if planCtx.isLocal && noMutations && planCtx.parallelizeScansIfLocal {
784786
// Even though we have a single flow on the gateway node, we might
785787
// have decided to parallelize the scans. If that's the case, we
786788
// will need to use the Leaf txn.
787789
for _, flow := range flows {
788-
localState.HasConcurrency = localState.HasConcurrency || execinfra.HasParallelProcessors(flow)
790+
if execinfra.HasParallelProcessors(flow) {
791+
localState.AddConcurrency(distsql.ConcurrencyHasParallelProcessors)
792+
}
789793
}
790794
}
791795
if noMutations {
@@ -886,7 +890,7 @@ func (dsp *DistSQLPlanner) Run(
886890
// Both index and lookup joins, with and without
887891
// ordering, are executed via the Streamer API that has
888892
// concurrency.
889-
localState.HasConcurrency = true
893+
localState.AddConcurrency(distsql.ConcurrencyStreamer)
890894
break
891895
}
892896
}
@@ -1009,11 +1013,37 @@ func (dsp *DistSQLPlanner) Run(
10091013
return
10101014
}
10111015

1012-
if len(flows) == 1 && evalCtx.Txn != nil && evalCtx.Txn.Type() == kv.RootTxn {
1013-
// If we have a fully local plan and a RootTxn, we don't expect any
1014-
// concurrency, so it's safe to use the DistSQLReceiver to push the
1015-
// metadata into directly from routines.
1016-
if planCtx.planner != nil {
1016+
if len(flows) == 1 && planCtx.planner != nil {
1017+
// We have a fully local plan, so check whether it'll be safe to use the
1018+
// DistSQLReceiver to push the metadata into directly from routines
1019+
// (which is the case when we don't have any concurrency between
1020+
// routines themselves as well as a routine and the "head" processor -
1021+
// the one pushing into the DistSQLReceiver).
1022+
var safe bool
1023+
if evalCtx.Txn != nil && evalCtx.Txn.Type() == kv.RootTxn {
1024+
// We have a RootTxn, so we don't expect any concurrency whatsoever.
1025+
safe = true
1026+
} else {
1027+
// We have a LeafTxn, so we need to examine what kind of concurrency
1028+
// is present in the flow.
1029+
var safeConcurrency distsql.ConcurrencyKind
1030+
// We don't care whether we use the Streamer API - it has
1031+
// concurrency only at the KV client level and below.
1032+
safeConcurrency |= distsql.ConcurrencyStreamer
1033+
// If we have "outer plan" concurrency, the "inner" and the
1034+
// "outer" plans have their own DistSQLReceivers.
1035+
//
1036+
// Note that the same is the case with parallel CHECKs concurrency,
1037+
// but then planCtx.planner is shared between goroutines, so we'll
1038+
// avoid mutating it. (We can't have routines in post-query CHECKs
1039+
// since only FK and UNIQUE checks are run in parallel.)
1040+
safeConcurrency |= distsql.ConcurrencyWithOuterPlan
1041+
unsafeConcurrency := ^safeConcurrency
1042+
if localState.GetConcurrency()&unsafeConcurrency == 0 {
1043+
safe = true
1044+
}
1045+
}
1046+
if safe {
10171047
planCtx.planner.routineMetadataForwarder = recv
10181048
}
10191049
}
@@ -1989,7 +2019,7 @@ func (dsp *DistSQLPlanner) PlanAndRunAll(
19892019
// Skip the diagram generation since on this "main" query path we
19902020
// can get it via the statement bundle.
19912021
true, /* skipDistSQLDiagramGeneration */
1992-
false, /* mustUseLeafTxn */
2022+
false, /* innerPlansMustUseLeafTxn */
19932023
) {
19942024
return recv.commErr
19952025
}
@@ -2056,7 +2086,7 @@ func (dsp *DistSQLPlanner) PlanAndRunSubqueries(
20562086
recv *DistSQLReceiver,
20572087
subqueryResultMemAcc *mon.BoundAccount,
20582088
skipDistSQLDiagramGeneration bool,
2059-
mustUseLeafTxn bool,
2089+
innerPlansMustUseLeafTxn bool,
20602090
) bool {
20612091
for planIdx, subqueryPlan := range subqueryPlans {
20622092
if err := dsp.planAndRunSubquery(
@@ -2069,7 +2099,7 @@ func (dsp *DistSQLPlanner) PlanAndRunSubqueries(
20692099
recv,
20702100
subqueryResultMemAcc,
20712101
skipDistSQLDiagramGeneration,
2072-
mustUseLeafTxn,
2102+
innerPlansMustUseLeafTxn,
20732103
); err != nil {
20742104
recv.SetError(err)
20752105
return false
@@ -2093,7 +2123,7 @@ func (dsp *DistSQLPlanner) planAndRunSubquery(
20932123
recv *DistSQLReceiver,
20942124
subqueryResultMemAcc *mon.BoundAccount,
20952125
skipDistSQLDiagramGeneration bool,
2096-
mustUseLeafTxn bool,
2126+
innerPlansMustUseLeafTxn bool,
20972127
) error {
20982128
subqueryDistribution, distSQLProhibitedErr := planner.getPlanDistribution(ctx, subqueryPlan.plan)
20992129
distribute := DistributionType(LocalDistribution)
@@ -2105,7 +2135,9 @@ func (dsp *DistSQLPlanner) planAndRunSubquery(
21052135
subqueryPlanCtx.stmtType = tree.Rows
21062136
subqueryPlanCtx.skipDistSQLDiagramGeneration = skipDistSQLDiagramGeneration
21072137
subqueryPlanCtx.subOrPostQuery = true
2108-
subqueryPlanCtx.mustUseLeafTxn = mustUseLeafTxn
2138+
if innerPlansMustUseLeafTxn {
2139+
subqueryPlanCtx.flowConcurrency = distsql.ConcurrencyWithOuterPlan
2140+
}
21092141
if planner.instrumentation.ShouldSaveFlows() {
21102142
subqueryPlanCtx.saveFlows = getDefaultSaveFlowsFunc(ctx, planner, planComponentTypeSubquery)
21112143
}
@@ -2724,7 +2756,9 @@ func (dsp *DistSQLPlanner) planAndRunPostquery(
27242756
}
27252757
postqueryPlanCtx.associateNodeWithComponents = associateNodeWithComponents
27262758
postqueryPlanCtx.collectExecStats = planner.instrumentation.ShouldCollectExecStats()
2727-
postqueryPlanCtx.mustUseLeafTxn = parallelCheck
2759+
if parallelCheck {
2760+
postqueryPlanCtx.flowConcurrency = distsql.ConcurrencyParallelChecks
2761+
}
27282762

27292763
postqueryPhysPlan, physPlanCleanup, err := dsp.createPhysPlan(ctx, postqueryPlanCtx, postqueryPlan)
27302764
defer physPlanCleanup()

pkg/sql/planner.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,8 +1077,9 @@ func (p *planner) ClearTableStatsCache() {
10771077
}
10781078
}
10791079

1080-
// mustUseLeafTxn returns true if inner plans must use a leaf transaction.
1081-
func (p *planner) mustUseLeafTxn() bool {
1080+
// innerPlansMustUseLeafTxn returns true if inner plans must use a leaf
1081+
// transaction.
1082+
func (p *planner) innerPlansMustUseLeafTxn() bool {
10821083
return atomic.LoadInt32(&p.atomic.innerPlansMustUseLeafTxn) >= 1
10831084
}
10841085

pkg/sql/schema_changer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ func (sc *SchemaChanger) backfillQueryIntoTable(
521521
ctx, localPlanner, localPlanner.ExtendedEvalContextCopy,
522522
localPlanner.curPlan.subqueryPlans, recv, &subqueryResultMemAcc,
523523
false, /* skipDistSQLDiagramGeneration */
524-
false, /* mustUseLeafTxn */
524+
false, /* innerPlansMustUseLeafTxn */
525525
) {
526526
if planAndRunErr = rw.Err(); planAndRunErr != nil {
527527
return

0 commit comments

Comments
 (0)