Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions pkg/sql/distsql_physical_planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -2435,14 +2435,11 @@ func (dsp *DistSQLPlanner) planTableReaders(
ignoreMisplannedRanges bool
err error
)
sd := planCtx.ExtendedEvalCtx.SessionData()
if planCtx.isLocal {
spanPartitions, parallelizeLocal = dsp.maybeParallelizeLocalScans(ctx, planCtx, info)
} else if info.post.Limit == 0 {
// No hard limit - plan all table readers where their data live. Note
// that we're ignoring soft limits for now since the TableReader will
// still read too eagerly in the soft limit case. To prevent this we'll
// need a new mechanism on the execution side to modulate table reads.
// TODO(yuzefovich): add that mechanism.
} else if info.post.Limit == 0 && (info.spec.LimitHint == 0 || !sd.DistSQLPreventPartitioningSoftLimitedScans) {
// No limits - plan all table readers where their data live.
bound := PartitionSpansBoundDefault
if info.desc.NumFamilies() > 1 {
bound = PartitionSpansBoundCFWithinRow
Expand All @@ -2452,7 +2449,7 @@ func (dsp *DistSQLPlanner) planTableReaders(
return err
}
} else {
// If the scan has a hard limit, use a single TableReader to avoid
// If the scan has a hard or soft limit, use a single TableReader to avoid
// reading more rows than necessary.
sqlInstanceID, err := dsp.getInstanceIDForScan(ctx, planCtx, info.spans, info.reverse)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4309,6 +4309,10 @@ func (m *sessionDataMutator) SetUseImprovedRoutineDepsTriggersAndComputedCols(va
m.data.UseImprovedRoutineDepsTriggersAndComputedCols = val
}

func (m *sessionDataMutator) SetDistSQLPreventPartitioningSoftLimitedScans(val bool) {
m.data.DistSQLPreventPartitioningSoftLimitedScans = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -3982,6 +3982,7 @@ distribute_join_row_count_threshold 1000
distribute_scan_row_count_threshold 10000
distribute_sort_row_count_threshold 1000
distsql_plan_gateway_bias 2
distsql_prevent_partitioning_soft_limited_scans off
distsql_use_reduced_leaf_write_sets on
enable_auto_rehoming off
enable_create_stats_using_extremes on
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2986,6 +2986,7 @@ distribute_scan_row_count_threshold 10000
distribute_sort_row_count_threshold 1000 NULL NULL NULL string
distsql off NULL NULL NULL string
distsql_plan_gateway_bias 2 NULL NULL NULL string
distsql_prevent_partitioning_soft_limited_scans off NULL NULL NULL string
distsql_use_reduced_leaf_write_sets on NULL NULL NULL string
enable_auto_rehoming off NULL NULL NULL string
enable_create_stats_using_extremes on NULL NULL NULL string
Expand Down Expand Up @@ -3231,6 +3232,7 @@ distribute_scan_row_count_threshold 10000
distribute_sort_row_count_threshold 1000 NULL user NULL 1000 1000
distsql off NULL user NULL off off
distsql_plan_gateway_bias 2 NULL user NULL 2 2
distsql_prevent_partitioning_soft_limited_scans off NULL user NULL off off
distsql_use_reduced_leaf_write_sets on NULL user NULL on on
enable_auto_rehoming off NULL user NULL off off
enable_create_stats_using_extremes on NULL user NULL on on
Expand Down Expand Up @@ -3463,6 +3465,7 @@ distribute_scan_row_count_threshold NULL NULL
distribute_sort_row_count_threshold NULL NULL NULL NULL NULL
distsql NULL NULL NULL NULL NULL
distsql_plan_gateway_bias NULL NULL NULL NULL NULL
distsql_prevent_partitioning_soft_limited_scans NULL NULL NULL NULL NULL
distsql_use_reduced_leaf_write_sets NULL NULL NULL NULL NULL
distsql_workmem NULL NULL NULL NULL NULL
enable_auto_rehoming NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ distribute_scan_row_count_threshold 10000
distribute_sort_row_count_threshold 1000
distsql off
distsql_plan_gateway_bias 2
distsql_prevent_partitioning_soft_limited_scans off
distsql_use_reduced_leaf_write_sets on
enable_auto_rehoming off
enable_create_stats_using_extremes on
Expand Down
160 changes: 160 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/distsql_scan
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# LogicTest: 5node

# Regression test for lazy reading of soft-limited scans.

statement ok
CREATE TABLE abc (a INT PRIMARY KEY, b INT, c STRING, FAMILY (a, b, c))

statement ok
INSERT INTO abc SELECT i, i % 10, repeat('c', 16384) FROM generate_series(0, 99) AS s(i)

statement ok
ALTER TABLE abc SPLIT AT SELECT i * 20 FROM generate_series(1, 4) AS s(i)

retry
statement ok
ALTER TABLE abc EXPERIMENTAL_RELOCATE
SELECT ARRAY[i+1], i * 20 FROM generate_series(0, 4) as s(i)

# Verify data placement.
query TTTI colnames,rowsort
SELECT start_key, end_key, replicas, lease_holder from [SHOW RANGES FROM TABLE abc WITH DETAILS]
ORDER BY 1
----
start_key end_key replicas lease_holder
<before:/Table/72> …/1/20 {1} 1
…/1/20 …/1/40 {2} 2
…/1/40 …/1/60 {3} 3
…/1/60 …/1/80 {4} 4
…/1/80 <after:/Max> {5} 5

statement ok
ANALYZE abc

query T
EXPLAIN ANALYZE (DISTSQL) SELECT a FROM abc WHERE a >= 0 AND a < 100 ORDER BY a LIMIT 10
----
planning time: 10µs
execution time: 100µs
distribution: <hidden>
vectorized: <hidden>
plan type: custom
rows decoded from KV: 10 (80 B, 20 KVs, 10 gRPC calls)
maximum memory usage: <hidden>
DistSQL network usage: <hidden>
regions: <hidden>
isolation level: serializable
priority: normal
quality of service: regular
·
• scan
sql nodes: <hidden>
kv nodes: <hidden>
regions: <hidden>
actual row count: 10
KV time: 0µs
KV rows decoded: 10
KV pairs read: 20
KV bytes read: 80 B
KV gRPC calls: 10
estimated max memory allocated: 0 B
estimated row count: 10 (10% of the table; stats collected <hidden> ago)
table: abc@abc_pkey
spans: [/0 - /99]
limit: 10
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJyMktFq20wQhe__pxjmKgEFr_7elIVCG9ulpnYcZJNSWhNGq4mzWNKquyNsY_xYfYE-WVnJTtrSQPdCaM5-e-asRgcM30rUuBhPx8MlELzP5jOg3MCnD-NsDBcEX1ulXvEbUJfw7mb0pBhIlbqEeTYaZ3D9GQimk9lkCanCBGtX8A1VHFB_wRRXCTbeGQ7B-SgdOmBS7FCrBG3dtBLlVYLGeUZ9QLFSMmpcUl5yxlSwH0TjgoVs2dlSbt5Sbu6bDe8xwaEr26oOGggTXDQUXwfqapCqeG5qKyt9to93ELsHDXXal57X1kVeOEgvia1Yg_rxPZwQtw1QsHEFF_rJJ98LB_BMhYbXCq57dZ3dDsFQWYZnsiHrz-T_UZvdDYcQhBswrq0FLngnA1vLpQY1eAaYNy8BFe2g4sr5PVBZOkMSo_UpchLzyAFcK00rGiLfXeEspApXxwT78vTtg9CaUae_DGsyQq2Oyb_PK-PQuDrwb6N6qZP6o9NVelwlyMWa-58kuNYbvvXOdGxfzjujTig4SL-b9sWkPm8F8UxVH3-V4EPptve2QI3qtK7-8jgvjAdoHeLFFo9u29ku902M9UBl4ARntOERC_vK1jaINajFt3w8_vczAAD__xOuBa4=

query T
EXPLAIN ANALYZE (DISTSQL) SELECT a FROM abc WHERE a >= 0 AND a < 100 AND b != 5 ORDER BY a LIMIT 10
----
planning time: 10µs
execution time: 100µs
distribution: <hidden>
vectorized: <hidden>
plan type: custom
rows decoded from KV: 60 (480 B, 120 KVs, 60 gRPC calls)
maximum memory usage: <hidden>
DistSQL network usage: <hidden>
regions: <hidden>
isolation level: serializable
priority: normal
quality of service: regular
·
• limit
│ count: 10
└── • filter
│ sql nodes: <hidden>
│ regions: <hidden>
│ actual row count: 50
│ execution time: 0µs
│ estimated row count: 90
│ filter: b != 5
└── • scan
sql nodes: <hidden>
kv nodes: <hidden>
regions: <hidden>
actual row count: 60
KV time: 0µs
KV rows decoded: 60
KV pairs read: 120
KV bytes read: 480 B
KV gRPC calls: 60
estimated max memory allocated: 0 B
estimated row count: 12 - 100 (100% of the table; stats collected <hidden> ago)
table: abc@abc_pkey
spans: [/0 - /99]
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsV9tu4zYQfe9XTOcpQWWIlC-xCSyQruNFjeaycIwtijZY0NKsV4gkqiRdxw38Wf2BflkhKfJa2ti1AhQIsNaDAQ6HwzlnzpD0I5o_IhR4O7ocDacg4d3k5grkzIdffhpNRnByIuH3BWNtegPsFH68voDS4gNn7PTJNoPv30D3FG4mF6MJvP0VJFyOr8ZT4AwdTFRA1zImg-I35Oighw620cEOOtjFOwdTrXwyRunM5TFfMA4eUDAHwyRd2Mx856CvNKF4RBvaiFDgVM4impAMSLvZRgFZGUb5NnLmn8uZ_zG9pxU6OFTRIk6MAOnADB28TWU2clnL9bKVP3-AbFMjIOHFUNM8VJmPJWMLkw1jEsD--ds8uailgYB8FVAggHuFdbayZECTDAQMevC2sM4n74fgyygyXzxTGerS08u4uPowHIKxlIKvFomFE3qwbpjYUwEsB1g4EN3vcojlA8QUK70CGUXKlzZLjeVZzKT1P5MBtbDpwgrI_HMIpYF7eLd2sBg-UW6snBMKvlWj8QUKtnYOL9O7MLKkSbvdao0Ku4BzL5ePEGJ8Pe2jg5dhHNpCO_RA_sKGKqmy_99Y2E4sXg1LdyeWLxCUDkhTUM3_nP-Ad-tnAF-rlkpdXpPkTZbXOf_f8bVr-Hi1WPzwnuIv6imPtdzOsamaNxVvUqdNU_VeZVP1Kli8wzXnvUhzHdZye0fNNdec16ROG82dvUrNnVWwtA_XXPtFmuuxlts_aq655tpN6rTRXP9Vaq5fwdI5XHOdF2muz1ouZ0fRNRddp0mhNqIbvErRDZq8vidkUpUYqiDZtROr7dTi2fuWgjkV72GjFtqn91r5uW8xvMkD5YaAjC1meTEYJ-WUsZpkvPnzsB2J743kVSLx7UjdeiRvf05NkmrvDdXZHYnXI3WawpN5VTAhu1T6HiJpKfFXGymV9qUMbVVkARnSoYzCv-TXCiyX5arT5FP4Z34KsK258igoJ_tFI5bTMRkj5xWP7qHa3qaoV6eou5ei3m6yvXqk3pHsGtlndYrO9lLU3012ux6pfyS7Rna_TtFg_4nEdrPd-eqY3H_ifot0D7Lr6VOklh_DAAWyp6_1zE_5YbZAzk12R95-Vsucr-kqzW64TzIy5OCVvKcLsqTjMAmNDX0UVi9ovf7u3wAAAP__qpTOfA==

statement ok
SET distsql_prevent_partitioning_soft_limited_scans = on

query T
EXPLAIN ANALYZE (DISTSQL) SELECT a FROM abc WHERE a >= 0 AND a < 100 AND b != 5 ORDER BY a LIMIT 10
----
planning time: 10µs
execution time: 100µs
distribution: <hidden>
vectorized: <hidden>
plan type: generic, reused
rows decoded from KV: 12 (96 B, 24 KVs, 12 gRPC calls)
maximum memory usage: <hidden>
DistSQL network usage: <hidden>
regions: <hidden>
isolation level: serializable
priority: normal
quality of service: regular
·
• limit
│ count: 10
└── • filter
│ sql nodes: <hidden>
│ regions: <hidden>
│ actual row count: 10
│ execution time: 0µs
│ estimated row count: 90
│ filter: b != 5
└── • scan
sql nodes: <hidden>
kv nodes: <hidden>
regions: <hidden>
actual row count: 12
KV time: 0µs
KV rows decoded: 12
KV pairs read: 24
KV bytes read: 96 B
KV gRPC calls: 12
estimated max memory allocated: 0 B
estimated row count: 12 - 100 (100% of the table; stats collected <hidden> ago)
table: abc@abc_pkey
spans: [/0 - /99]
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJyUUt1q20wQvf-eYr65imGDV-4P7UIgje1Q0zgOikkprQmr1cRZImnV3RGxCX6svkCfrKxkt0lo0lYXgjl79sycPXOH4WuBCs_HJ-PhHDQcp7Mp6MzAx_fjdAx7exq-NFK-oAOQPXh3OoIdYiCRsrfFMvj_AF71YJaOxikcfQINJ5PpZA6JRIGVy-lUlxRQfcYEFwJr7wyF4HyE7lrCJF-hkgJtVTcc4YVA4zyhukO2XBAqnOusoJR0Tr4fhXNibYtWVmfmUGfmsr6hNQocuqIpq6BAC8hQ4HmtY9WX-_1ExqsfLiB2DQqqpCs9La2LJKbAHcS2JAXy-7ewpbjbADkZl1OuIBl0aLZmCuBJ5wrevoajDl2mZ0MwuijCL2atrd8xBy9R4PRiOITAVINxTcWwRyvu24p7CmTrsCMQ3TxFKPUKSiqdX4MuCmc0x9FkO0Wm2VxTANdw3bCCyG8t7IBkgIuNwK7cvnlgvSRUyb2QJiNUciP-PqdjWzB58v3kYUgdruBw0O6LUmpyOn-DAmdxmsPIPrGl5W5taEWmYeuqhzn82ZV80tXgkavkX1ylFGpXBXrg6alO8lGn_WSzEEj5krqVD67xhs68My23K2etUAvkFLg7TbpiUu2OAnvS5c9Q7islzyoNnlNaCLwq3O2lzVGh3H77v_ntPowX9DLEJzq_dret7HxdR4NXuggkcKpvaERMvrSVDWwNKvYNbTb__QgAAP__DIZi7A==

statement ok
RESET distsql_prevent_partitioning_soft_limited_scans
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/tests/5node/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ go_test(
"//build/toolchains:is_heavy": {"test.Pool": "heavy"},
"//conditions:default": {"test.Pool": "large"},
}),
shard_count = 28,
shard_count = 29,
tags = ["cpu:3"],
deps = [
"//pkg/base",
Expand Down
7 changes: 7 additions & 0 deletions pkg/sql/opt/exec/execbuilder/tests/5node/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/sql/sessiondatapb/local_only_session_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,10 @@ message LocalOnlySessionData {
// triggers or computed columns. The fix applies at routine creation time and
// prevents unnecessary column dependencies from being recorded.
bool use_improved_routine_deps_triggers_and_computed_cols = 196;
// DistSQLPreventPartitioningSoftLimitedScans, when true, prevents the distsql
// physical planner from partitioning scans with soft limits into multiple
// TableReaders. When true, this matches the behavior for hard limits.
bool distsql_prevent_partitioning_soft_limited_scans = 197 [(gogoproto.customname) = "DistSQLPreventPartitioningSoftLimitedScans"];

///////////////////////////////////////////////////////////////////////////
// WARNING: consider whether a session parameter you're adding needs to //
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -4389,6 +4389,23 @@ var varGen = map[string]sessionVar{
},
GlobalDefault: globalFalse,
},

// CockroachDB extension.
`distsql_prevent_partitioning_soft_limited_scans`: {
GetStringVal: makePostgresBoolGetStringValFn(`distsql_prevent_partitioning_soft_limited_scans`),
Set: func(_ context.Context, m sessionDataMutator, s string) error {
b, err := paramparse.ParseBoolVar("distsql_prevent_partitioning_soft_limited_scans", s)
if err != nil {
return err
}
m.SetDistSQLPreventPartitioningSoftLimitedScans(b)
return nil
},
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
return formatBoolAsPostgresSetting(evalCtx.SessionData().DistSQLPreventPartitioningSoftLimitedScans), nil
},
GlobalDefault: globalFalse,
},
}

func ReplicationModeFromString(s string) (sessiondatapb.ReplicationMode, error) {
Expand Down