Skip to content

Commit 1707c4c

Browse files
committed
sql: use soft limit if available to decide on scan distribution
Release note (bug fix): In 25.1 we changed the physical planning heuristics so that large constrained table scans, estimated to scan at least 10k rows (controlled via `distribute_scan_row_count_threshold`), would force plan distribution with `distsql=auto` mode. However, if the scan has a "soft limit" we would still use the full estimate (e.g. in "estimated row count: 100 - 10,000" we would use 10k as the estimate) which can lead to distributing queries that shouldn't be, increasing the query latency. New session variable `use_soft_limit_for_distribute_scan` (default `false`) determines whether we use the soft limit when deciding that a scan is "large" or not.
1 parent b168254 commit 1707c4c

File tree

8 files changed

+126
-6
lines changed

8 files changed

+126
-6
lines changed

pkg/sql/distsql_physical_planner.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -717,17 +717,27 @@ func checkSupportForPlanNode(
717717
// TODO(nvanbenschoten): lift this restriction.
718718
return cannotDistribute, cannotDistributeRowLevelLockingErr
719719
}
720-
721720
if n.localityOptimized {
722721
// This is a locality optimized scan.
723722
return cannotDistribute, localityOptimizedOpNotDistributableErr
724723
}
725-
// TODO(yuzefovich): consider using the soft limit in making a decision
726-
// here.
727724
scanRec := canDistribute
728-
if n.estimatedRowCount != 0 && n.estimatedRowCount >= sd.DistributeScanRowCountThreshold {
729-
log.VEventf(ctx, 2, "large scan recommends plan distribution")
730-
scanRec = shouldDistribute
725+
if n.estimatedRowCount != 0 {
726+
var suffix string
727+
estimate := n.estimatedRowCount
728+
if n.softLimit != 0 && sd.UseSoftLimitForDistributeScan {
729+
estimate = uint64(n.softLimit)
730+
suffix = " (using soft limit)"
731+
}
732+
if estimate >= sd.DistributeScanRowCountThreshold {
733+
log.VEventf(ctx, 2, "large scan recommends plan distribution%s", suffix)
734+
scanRec = shouldDistribute
735+
} else if n.softLimit != 0 && n.estimatedRowCount >= sd.DistributeScanRowCountThreshold {
736+
log.VEventf(
737+
ctx, 2, `estimated row count would consider the scan "large" `+
738+
`while soft limit hint makes it "small"`,
739+
)
740+
}
731741
}
732742
if n.isFull && (n.estimatedRowCount == 0 || sd.AlwaysDistributeFullScans) {
733743
// In the absence of table stats, we default to always distributing

pkg/sql/exec_util.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3645,6 +3645,10 @@ func (m *sessionDataMutator) SetAlwaysDistributeFullScans(val bool) {
36453645
m.data.AlwaysDistributeFullScans = val
36463646
}
36473647

3648+
func (m *sessionDataMutator) SetUseSoftLimitForDistributeScan(val bool) {
3649+
m.data.UseSoftLimitForDistributeScan = val
3650+
}
3651+
36483652
func (m *sessionDataMutator) SetDistributeJoinRowCountThreshold(val uint64) {
36493653
m.data.DistributeJoinRowCountThreshold = val
36503654
}

pkg/sql/logictest/testdata/logic_test/information_schema

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4162,6 +4162,7 @@ use_cputs_on_non_unique_indexes off
41624162
use_improved_routine_dependency_tracking on
41634163
use_pre_25_2_variadic_builtins off
41644164
use_proc_txn_control_extended_protocol_fix on
4165+
use_soft_limit_for_distribute_scan off
41654166
variable_inequality_lookup_join_enabled on
41664167
vector_search_beam_size 32
41674168
vector_search_rerank_multiplier 50

pkg/sql/logictest/testdata/logic_test/pg_catalog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3163,6 +3163,7 @@ use_declarative_schema_changer on
31633163
use_improved_routine_dependency_tracking on NULL NULL NULL string
31643164
use_pre_25_2_variadic_builtins off NULL NULL NULL string
31653165
use_proc_txn_control_extended_protocol_fix on NULL NULL NULL string
3166+
use_soft_limit_for_distribute_scan off NULL NULL NULL string
31663167
variable_inequality_lookup_join_enabled on NULL NULL NULL string
31673168
vector_search_beam_size 32 NULL NULL NULL string
31683169
vector_search_rerank_multiplier 50 NULL NULL NULL string
@@ -3405,6 +3406,7 @@ use_declarative_schema_changer on
34053406
use_improved_routine_dependency_tracking on NULL user NULL on on
34063407
use_pre_25_2_variadic_builtins off NULL user NULL off off
34073408
use_proc_txn_control_extended_protocol_fix on NULL user NULL on on
3409+
use_soft_limit_for_distribute_scan off NULL user NULL off off
34083410
variable_inequality_lookup_join_enabled on NULL user NULL on on
34093411
vector_search_beam_size 32 NULL user NULL 32 32
34103412
vector_search_rerank_multiplier 50 NULL user NULL 50 50
@@ -3639,6 +3641,7 @@ use_declarative_schema_changer NULL NULL
36393641
use_improved_routine_dependency_tracking NULL NULL NULL NULL NULL
36403642
use_pre_25_2_variadic_builtins NULL NULL NULL NULL NULL
36413643
use_proc_txn_control_extended_protocol_fix NULL NULL NULL NULL NULL
3644+
use_soft_limit_for_distribute_scan NULL NULL NULL NULL NULL
36423645
variable_inequality_lookup_join_enabled NULL NULL NULL NULL NULL
36433646
vector_search_beam_size NULL NULL NULL NULL NULL
36443647
vector_search_rerank_multiplier NULL NULL NULL NULL NULL

pkg/sql/logictest/testdata/logic_test/show_source

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ use_declarative_schema_changer on
253253
use_improved_routine_dependency_tracking on
254254
use_pre_25_2_variadic_builtins off
255255
use_proc_txn_control_extended_protocol_fix on
256+
use_soft_limit_for_distribute_scan off
256257
variable_inequality_lookup_join_enabled on
257258
vector_search_beam_size 32
258259
vector_search_rerank_multiplier 50

pkg/sql/opt/exec/execbuilder/testdata/distsql_auto_mode

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,83 @@ query T
280280
SELECT info FROM [EXPLAIN SELECT a FROM abc INNER LOOKUP JOIN kv ON b = k] WHERE info LIKE 'distribution%'
281281
----
282282
distribution: full
283+
284+
subtest regression_152295
285+
286+
statement ok
287+
CREATE TABLE a (
288+
i INT PRIMARY KEY,
289+
j INT
290+
)
291+
292+
statement ok
293+
CREATE TABLE b (
294+
k INT PRIMARY KEY
295+
)
296+
297+
statement ok
298+
ALTER TABLE a SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
299+
300+
retry
301+
statement ok
302+
ALTER TABLE a EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
303+
304+
statement ok
305+
ALTER TABLE b SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
306+
307+
retry
308+
statement ok
309+
ALTER TABLE b EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
310+
311+
statement ok
312+
ALTER TABLE a INJECT STATISTICS '[
313+
{
314+
"columns": ["i"],
315+
"created_at": "2018-01-01 1:00:00.00000+00:00",
316+
"row_count": 100000,
317+
"distinct_count": 100000
318+
}
319+
]'
320+
321+
statement ok
322+
SET use_soft_limit_for_distribute_scan = true
323+
324+
# We choose to not distribute this query since the constrained scan has a soft
325+
# limit hint of 100 that is below the distribute scan threshold of 10k (even
326+
# though the "estimated row count" is 33,334).
327+
query T retry
328+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
329+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
330+
----
331+
distribution: local
332+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
333+
334+
statement ok
335+
SET distribute_scan_row_count_threshold = 10
336+
337+
# But now the soft limit hint exceeds the threshold - we should distribute.
338+
query T
339+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
340+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
341+
----
342+
distribution: full
343+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
344+
345+
statement ok
346+
RESET distribute_scan_row_count_threshold
347+
348+
statement ok
349+
SET use_soft_limit_for_distribute_scan = false
350+
351+
# Now we don't look at the soft limit hint - we should distribute.
352+
query T
353+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
354+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
355+
----
356+
distribution: full
357+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
358+
359+
statement ok
360+
RESET use_soft_limit_for_distribute_scan;
361+
362+
subtest end

pkg/sql/sessiondatapb/local_only_session_data.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,10 @@ message LocalOnlySessionData {
732732
// during query optimization. The names of the rules can be found in the
733733
// opt/rule_name.og.go file.
734734
repeated string disable_optimizer_rules = 184;
735+
// UseSoftLimitForDistributeScan, if set, means that we'll use the soft limit
736+
// hint - if available - when comparing against
737+
// DistributeScanRowCountThreshold.
738+
bool use_soft_limit_for_distribute_scan = 185;
735739

736740
///////////////////////////////////////////////////////////////////////////
737741
// WARNING: consider whether a session parameter you're adding needs to //

pkg/sql/vars.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,23 @@ var varGen = map[string]sessionVar{
737737
GlobalDefault: globalFalse,
738738
},
739739

740+
// CockroachDB extension.
741+
`use_soft_limit_for_distribute_scan`: {
742+
GetStringVal: makePostgresBoolGetStringValFn(`use_soft_limit_for_distribute_scan`),
743+
Set: func(_ context.Context, m sessionDataMutator, s string) error {
744+
b, err := paramparse.ParseBoolVar("use_soft_limit_for_distribute_scan", s)
745+
if err != nil {
746+
return err
747+
}
748+
m.SetUseSoftLimitForDistributeScan(b)
749+
return nil
750+
},
751+
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
752+
return formatBoolAsPostgresSetting(evalCtx.SessionData().UseSoftLimitForDistributeScan), nil
753+
},
754+
GlobalDefault: globalFalse,
755+
},
756+
740757
// CockroachDB extension.
741758
`distribute_join_row_count_threshold`: {
742759
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {

0 commit comments

Comments
 (0)