Skip to content

Commit 59317ae

Browse files
authored
Merge pull request #152556 from yuzefovich/blathers/backport-release-25.3-152300
release-25.3: sql: use soft limit if available to decide on scan distribution
2 parents 9206427 + 891c31c commit 59317ae

File tree

8 files changed

+126
-6
lines changed

8 files changed

+126
-6
lines changed

pkg/sql/distsql_physical_planner.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -717,17 +717,27 @@ func checkSupportForPlanNode(
717717
// TODO(nvanbenschoten): lift this restriction.
718718
return cannotDistribute, cannotDistributeRowLevelLockingErr
719719
}
720-
721720
if n.localityOptimized {
722721
// This is a locality optimized scan.
723722
return cannotDistribute, localityOptimizedOpNotDistributableErr
724723
}
725-
// TODO(yuzefovich): consider using the soft limit in making a decision
726-
// here.
727724
scanRec := canDistribute
728-
if n.estimatedRowCount != 0 && n.estimatedRowCount >= sd.DistributeScanRowCountThreshold {
729-
log.VEventf(ctx, 2, "large scan recommends plan distribution")
730-
scanRec = shouldDistribute
725+
if n.estimatedRowCount != 0 {
726+
var suffix string
727+
estimate := n.estimatedRowCount
728+
if n.softLimit != 0 && sd.UseSoftLimitForDistributeScan {
729+
estimate = uint64(n.softLimit)
730+
suffix = " (using soft limit)"
731+
}
732+
if estimate >= sd.DistributeScanRowCountThreshold {
733+
log.VEventf(ctx, 2, "large scan recommends plan distribution%s", suffix)
734+
scanRec = shouldDistribute
735+
} else if n.softLimit != 0 && n.estimatedRowCount >= sd.DistributeScanRowCountThreshold {
736+
log.VEventf(
737+
ctx, 2, `estimated row count would consider the scan "large" `+
738+
`while soft limit hint makes it "small"`,
739+
)
740+
}
731741
}
732742
if n.isFull && (n.estimatedRowCount == 0 || sd.AlwaysDistributeFullScans) {
733743
// In the absence of table stats, we default to always distributing

pkg/sql/exec_util.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3521,6 +3521,10 @@ func (m *sessionDataMutator) SetAlwaysDistributeFullScans(val bool) {
35213521
m.data.AlwaysDistributeFullScans = val
35223522
}
35233523

3524+
func (m *sessionDataMutator) SetUseSoftLimitForDistributeScan(val bool) {
3525+
m.data.UseSoftLimitForDistributeScan = val
3526+
}
3527+
35243528
func (m *sessionDataMutator) SetDistributeJoinRowCountThreshold(val uint64) {
35253529
m.data.DistributeJoinRowCountThreshold = val
35263530
}

pkg/sql/logictest/testdata/logic_test/information_schema

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4143,6 +4143,7 @@ use_cputs_on_non_unique_indexes off
41434143
use_improved_routine_dependency_tracking on
41444144
use_pre_25_2_variadic_builtins off
41454145
use_proc_txn_control_extended_protocol_fix on
4146+
use_soft_limit_for_distribute_scan off
41464147
variable_inequality_lookup_join_enabled on
41474148
vector_search_beam_size 32
41484149
vector_search_rerank_multiplier 50

pkg/sql/logictest/testdata/logic_test/pg_catalog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3148,6 +3148,7 @@ use_declarative_schema_changer on
31483148
use_improved_routine_dependency_tracking on NULL NULL NULL string
31493149
use_pre_25_2_variadic_builtins off NULL NULL NULL string
31503150
use_proc_txn_control_extended_protocol_fix on NULL NULL NULL string
3151+
use_soft_limit_for_distribute_scan off NULL NULL NULL string
31513152
variable_inequality_lookup_join_enabled on NULL NULL NULL string
31523153
vector_search_beam_size 32 NULL NULL NULL string
31533154
vector_search_rerank_multiplier 50 NULL NULL NULL string
@@ -3389,6 +3390,7 @@ use_declarative_schema_changer on
33893390
use_improved_routine_dependency_tracking on NULL user NULL on on
33903391
use_pre_25_2_variadic_builtins off NULL user NULL off off
33913392
use_proc_txn_control_extended_protocol_fix on NULL user NULL on on
3393+
use_soft_limit_for_distribute_scan off NULL user NULL off off
33923394
variable_inequality_lookup_join_enabled on NULL user NULL on on
33933395
vector_search_beam_size 32 NULL user NULL 32 32
33943396
vector_search_rerank_multiplier 50 NULL user NULL 50 50
@@ -3622,6 +3624,7 @@ use_declarative_schema_changer NULL NULL
36223624
use_improved_routine_dependency_tracking NULL NULL NULL NULL NULL
36233625
use_pre_25_2_variadic_builtins NULL NULL NULL NULL NULL
36243626
use_proc_txn_control_extended_protocol_fix NULL NULL NULL NULL NULL
3627+
use_soft_limit_for_distribute_scan NULL NULL NULL NULL NULL
36253628
variable_inequality_lookup_join_enabled NULL NULL NULL NULL NULL
36263629
vector_search_beam_size NULL NULL NULL NULL NULL
36273630
vector_search_rerank_multiplier NULL NULL NULL NULL NULL

pkg/sql/logictest/testdata/logic_test/show_source

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ use_declarative_schema_changer on
252252
use_improved_routine_dependency_tracking on
253253
use_pre_25_2_variadic_builtins off
254254
use_proc_txn_control_extended_protocol_fix on
255+
use_soft_limit_for_distribute_scan off
255256
variable_inequality_lookup_join_enabled on
256257
vector_search_beam_size 32
257258
vector_search_rerank_multiplier 50

pkg/sql/opt/exec/execbuilder/testdata/distsql_auto_mode

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,83 @@ query T
280280
SELECT info FROM [EXPLAIN SELECT a FROM abc INNER LOOKUP JOIN kv ON b = k] WHERE info LIKE 'distribution%'
281281
----
282282
distribution: full
283+
284+
subtest regression_152295
285+
286+
statement ok
287+
CREATE TABLE a (
288+
i INT PRIMARY KEY,
289+
j INT
290+
)
291+
292+
statement ok
293+
CREATE TABLE b (
294+
k INT PRIMARY KEY
295+
)
296+
297+
statement ok
298+
ALTER TABLE a SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
299+
300+
retry
301+
statement ok
302+
ALTER TABLE a EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
303+
304+
statement ok
305+
ALTER TABLE b SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
306+
307+
retry
308+
statement ok
309+
ALTER TABLE b EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
310+
311+
statement ok
312+
ALTER TABLE a INJECT STATISTICS '[
313+
{
314+
"columns": ["i"],
315+
"created_at": "2018-01-01 1:00:00.00000+00:00",
316+
"row_count": 100000,
317+
"distinct_count": 100000
318+
}
319+
]'
320+
321+
statement ok
322+
SET use_soft_limit_for_distribute_scan = true
323+
324+
# We choose to not distribute this query since the constrained scan has a soft
325+
# limit hint of 100 that is below the distribute scan threshold of 10k (even
326+
# though the "estimated row count" is 33,334).
327+
query T retry
328+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
329+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
330+
----
331+
distribution: local
332+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
333+
334+
statement ok
335+
SET distribute_scan_row_count_threshold = 10
336+
337+
# But now the soft limit hint exceeds the threshold - we should distribute.
338+
query T
339+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
340+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
341+
----
342+
distribution: full
343+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
344+
345+
statement ok
346+
RESET distribute_scan_row_count_threshold
347+
348+
statement ok
349+
SET use_soft_limit_for_distribute_scan = false
350+
351+
# Now we don't look at the soft limit hint - we should distribute.
352+
query T
353+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
354+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
355+
----
356+
distribution: full
357+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
358+
359+
statement ok
360+
RESET use_soft_limit_for_distribute_scan;
361+
362+
subtest end

pkg/sql/sessiondatapb/local_only_session_data.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,10 @@ message LocalOnlySessionData {
727727
// during query optimization. The names of the rules can be found in the
728728
// opt/rule_name.og.go file.
729729
repeated string disable_optimizer_rules = 184;
730+
// UseSoftLimitForDistributeScan, if set, means that we'll use the soft limit
731+
// hint - if available - when comparing against
732+
// DistributeScanRowCountThreshold.
733+
bool use_soft_limit_for_distribute_scan = 185;
730734

731735
///////////////////////////////////////////////////////////////////////////
732736
// WARNING: consider whether a session parameter you're adding needs to //

pkg/sql/vars.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,23 @@ var varGen = map[string]sessionVar{
737737
GlobalDefault: globalFalse,
738738
},
739739

740+
// CockroachDB extension.
741+
`use_soft_limit_for_distribute_scan`: {
742+
GetStringVal: makePostgresBoolGetStringValFn(`use_soft_limit_for_distribute_scan`),
743+
Set: func(_ context.Context, m sessionDataMutator, s string) error {
744+
b, err := paramparse.ParseBoolVar("use_soft_limit_for_distribute_scan", s)
745+
if err != nil {
746+
return err
747+
}
748+
m.SetUseSoftLimitForDistributeScan(b)
749+
return nil
750+
},
751+
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
752+
return formatBoolAsPostgresSetting(evalCtx.SessionData().UseSoftLimitForDistributeScan), nil
753+
},
754+
GlobalDefault: globalFalse,
755+
},
756+
740757
// CockroachDB extension.
741758
`distribute_join_row_count_threshold`: {
742759
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {

0 commit comments

Comments
 (0)