Skip to content

Commit 1b6ab61

Browse files
authored
Merge pull request #152559 from yuzefovich/backport25.2-152300
release-25.2: sql: use soft limit if available to decide on scan distribution
2 parents 9cb19ca + 3e6c3a8 commit 1b6ab61

File tree

8 files changed

+126
-6
lines changed

8 files changed

+126
-6
lines changed

pkg/sql/distsql_physical_planner.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -713,17 +713,27 @@ func checkSupportForPlanNode(
713713
// TODO(nvanbenschoten): lift this restriction.
714714
return cannotDistribute, cannotDistributeRowLevelLockingErr
715715
}
716-
717716
if n.localityOptimized {
718717
// This is a locality optimized scan.
719718
return cannotDistribute, localityOptimizedOpNotDistributableErr
720719
}
721-
// TODO(yuzefovich): consider using the soft limit in making a decision
722-
// here.
723720
scanRec := canDistribute
724-
if n.estimatedRowCount != 0 && n.estimatedRowCount >= sd.DistributeScanRowCountThreshold {
725-
log.VEventf(ctx, 2, "large scan recommends plan distribution")
726-
scanRec = shouldDistribute
721+
if n.estimatedRowCount != 0 {
722+
var suffix string
723+
estimate := n.estimatedRowCount
724+
if n.softLimit != 0 && sd.UseSoftLimitForDistributeScan {
725+
estimate = uint64(n.softLimit)
726+
suffix = " (using soft limit)"
727+
}
728+
if estimate >= sd.DistributeScanRowCountThreshold {
729+
log.VEventf(ctx, 2, "large scan recommends plan distribution%s", suffix)
730+
scanRec = shouldDistribute
731+
} else if n.softLimit != 0 && n.estimatedRowCount >= sd.DistributeScanRowCountThreshold {
732+
log.VEventf(
733+
ctx, 2, `estimated row count would consider the scan "large" `+
734+
`while soft limit hint makes it "small"`,
735+
)
736+
}
727737
}
728738
if n.isFull && (n.estimatedRowCount == 0 || sd.AlwaysDistributeFullScans) {
729739
// In the absence of table stats, we default to always distributing

pkg/sql/exec_util.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3478,6 +3478,10 @@ func (m *sessionDataMutator) SetAlwaysDistributeFullScans(val bool) {
34783478
m.data.AlwaysDistributeFullScans = val
34793479
}
34803480

3481+
func (m *sessionDataMutator) SetUseSoftLimitForDistributeScan(val bool) {
3482+
m.data.UseSoftLimitForDistributeScan = val
3483+
}
3484+
34813485
func (m *sessionDataMutator) SetDistributeJoinRowCountThreshold(val uint64) {
34823486
m.data.DistributeJoinRowCountThreshold = val
34833487
}

pkg/sql/logictest/testdata/logic_test/information_schema

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4108,6 +4108,7 @@ unsafe_allow_triggers_modifying_cascades off
41084108
use_cputs_on_non_unique_indexes off
41094109
use_pre_25_2_variadic_builtins off
41104110
use_proc_txn_control_extended_protocol_fix off
4111+
use_soft_limit_for_distribute_scan off
41114112
variable_inequality_lookup_join_enabled on
41124113
vector_search_beam_size 32
41134114
xmloption content

pkg/sql/logictest/testdata/logic_test/pg_catalog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3119,6 +3119,7 @@ use_cputs_on_non_unique_indexes off N
31193119
use_declarative_schema_changer on NULL NULL NULL string
31203120
use_pre_25_2_variadic_builtins off NULL NULL NULL string
31213121
use_proc_txn_control_extended_protocol_fix off NULL NULL NULL string
3122+
use_soft_limit_for_distribute_scan off NULL NULL NULL string
31223123
variable_inequality_lookup_join_enabled on NULL NULL NULL string
31233124
vector_search_beam_size 32 NULL NULL NULL string
31243125
vectorize on NULL NULL NULL string
@@ -3343,6 +3344,7 @@ use_cputs_on_non_unique_indexes off N
33433344
use_declarative_schema_changer on NULL user NULL on on
33443345
use_pre_25_2_variadic_builtins off NULL user NULL off off
33453346
use_proc_txn_control_extended_protocol_fix off NULL user NULL off off
3347+
use_soft_limit_for_distribute_scan off NULL user NULL off off
33463348
variable_inequality_lookup_join_enabled on NULL user NULL on on
33473349
vector_search_beam_size 32 NULL user NULL 32 32
33483350
vectorize on NULL user NULL on on
@@ -3567,6 +3569,7 @@ use_cputs_on_non_unique_indexes NULL NULL NULL
35673569
use_declarative_schema_changer NULL NULL NULL NULL NULL
35683570
use_pre_25_2_variadic_builtins NULL NULL NULL NULL NULL
35693571
use_proc_txn_control_extended_protocol_fix NULL NULL NULL NULL NULL
3572+
use_soft_limit_for_distribute_scan NULL NULL NULL NULL NULL
35703573
variable_inequality_lookup_join_enabled NULL NULL NULL NULL NULL
35713574
vector_search_beam_size NULL NULL NULL NULL NULL
35723575
vectorize NULL NULL NULL NULL NULL

pkg/sql/logictest/testdata/logic_test/show_source

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ use_cputs_on_non_unique_indexes off
235235
use_declarative_schema_changer on
236236
use_pre_25_2_variadic_builtins off
237237
use_proc_txn_control_extended_protocol_fix off
238+
use_soft_limit_for_distribute_scan off
238239
variable_inequality_lookup_join_enabled on
239240
vector_search_beam_size 32
240241
vectorize on

pkg/sql/opt/exec/execbuilder/testdata/distsql_auto_mode

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,83 @@ query T
280280
SELECT info FROM [EXPLAIN SELECT a FROM abc INNER LOOKUP JOIN kv ON b = k] WHERE info LIKE 'distribution%'
281281
----
282282
distribution: full
283+
284+
subtest regression_152295
285+
286+
statement ok
287+
CREATE TABLE a (
288+
i INT PRIMARY KEY,
289+
j INT
290+
)
291+
292+
statement ok
293+
CREATE TABLE b (
294+
k INT PRIMARY KEY
295+
)
296+
297+
statement ok
298+
ALTER TABLE a SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
299+
300+
retry
301+
statement ok
302+
ALTER TABLE a EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
303+
304+
statement ok
305+
ALTER TABLE b SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
306+
307+
retry
308+
statement ok
309+
ALTER TABLE b EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
310+
311+
statement ok
312+
ALTER TABLE a INJECT STATISTICS '[
313+
{
314+
"columns": ["i"],
315+
"created_at": "2018-01-01 1:00:00.00000+00:00",
316+
"row_count": 100000,
317+
"distinct_count": 100000
318+
}
319+
]'
320+
321+
statement ok
322+
SET use_soft_limit_for_distribute_scan = true
323+
324+
# We choose to not distribute this query since the constrained scan has a soft
325+
# limit hint of 100 that is below the distribute scan threshold of 10k (even
326+
# though the "estimated row count" is 33,334).
327+
query T retry
328+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
329+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
330+
----
331+
distribution: local
332+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
333+
334+
statement ok
335+
SET distribute_scan_row_count_threshold = 10
336+
337+
# But now the soft limit hint exceeds the threshold - we should distribute.
338+
query T
339+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
340+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
341+
----
342+
distribution: full
343+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
344+
345+
statement ok
346+
RESET distribute_scan_row_count_threshold
347+
348+
statement ok
349+
SET use_soft_limit_for_distribute_scan = false
350+
351+
# Now we don't look at the soft limit hint - we should distribute.
352+
query T
353+
SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
354+
WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
355+
----
356+
distribution: full
357+
estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
358+
359+
statement ok
360+
RESET use_soft_limit_for_distribute_scan;
361+
362+
subtest end

pkg/sql/sessiondatapb/local_only_session_data.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,10 @@ message LocalOnlySessionData {
683683
// during query optimization. The names of the rules can be found in the
684684
// opt/rule_name.og.go file.
685685
repeated string disable_optimizer_rules = 184;
686+
// UseSoftLimitForDistributeScan, if set, means that we'll use the soft limit
687+
// hint - if available - when comparing against
688+
// DistributeScanRowCountThreshold.
689+
bool use_soft_limit_for_distribute_scan = 185;
686690

687691
///////////////////////////////////////////////////////////////////////////
688692
// WARNING: consider whether a session parameter you're adding needs to //

pkg/sql/vars.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,23 @@ var varGen = map[string]sessionVar{
725725
GlobalDefault: globalFalse,
726726
},
727727

728+
// CockroachDB extension.
729+
`use_soft_limit_for_distribute_scan`: {
730+
GetStringVal: makePostgresBoolGetStringValFn(`use_soft_limit_for_distribute_scan`),
731+
Set: func(_ context.Context, m sessionDataMutator, s string) error {
732+
b, err := paramparse.ParseBoolVar("use_soft_limit_for_distribute_scan", s)
733+
if err != nil {
734+
return err
735+
}
736+
m.SetUseSoftLimitForDistributeScan(b)
737+
return nil
738+
},
739+
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
740+
return formatBoolAsPostgresSetting(evalCtx.SessionData().UseSoftLimitForDistributeScan), nil
741+
},
742+
GlobalDefault: globalFalse,
743+
},
744+
728745
// CockroachDB extension.
729746
`distribute_join_row_count_threshold`: {
730747
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {

0 commit comments

Comments
 (0)