@@ -280,3 +280,83 @@ query T
280
280
SELECT info FROM [EXPLAIN SELECT a FROM abc INNER LOOKUP JOIN kv ON b = k] WHERE info LIKE 'distribution%'
281
281
----
282
282
distribution: full
283
+
284
+ subtest regression_152295
285
+
286
+ statement ok
287
+ CREATE TABLE a (
288
+ i INT PRIMARY KEY,
289
+ j INT
290
+ )
291
+
292
+ statement ok
293
+ CREATE TABLE b (
294
+ k INT PRIMARY KEY
295
+ )
296
+
297
+ statement ok
298
+ ALTER TABLE a SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
299
+
300
+ retry
301
+ statement ok
302
+ ALTER TABLE a EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
303
+
304
+ statement ok
305
+ ALTER TABLE b SPLIT AT SELECT i FROM generate_series(1, 9) AS g(i)
306
+
307
+ retry
308
+ statement ok
309
+ ALTER TABLE b EXPERIMENTAL_RELOCATE SELECT ARRAY[i%5+1], i FROM generate_series(0, 9) AS g(i)
310
+
311
+ statement ok
312
+ ALTER TABLE a INJECT STATISTICS '[
313
+ {
314
+ "columns": ["i"],
315
+ "created_at": "2018-01-01 1:00:00.00000+00:00",
316
+ "row_count": 100000,
317
+ "distinct_count": 100000
318
+ }
319
+ ]'
320
+
321
+ statement ok
322
+ SET use_soft_limit_for_distribute_scan = true
323
+
324
+ # We choose to not distribute this query since the constrained scan has a soft
325
+ # limit hint of 100 that is below the distribute scan threshold of 10k (even
326
+ # though the "estimated row count" is 33,334).
327
+ query T retry
328
+ SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
329
+ WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
330
+ ----
331
+ distribution: local
332
+ estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
333
+
334
+ statement ok
335
+ SET distribute_scan_row_count_threshold = 10
336
+
337
+ # But now the soft limit hint exceeds the threshold - we should distribute.
338
+ query T
339
+ SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
340
+ WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
341
+ ----
342
+ distribution: full
343
+ estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
344
+
345
+ statement ok
346
+ RESET distribute_scan_row_count_threshold
347
+
348
+ statement ok
349
+ SET use_soft_limit_for_distribute_scan = false
350
+
351
+ # Now we don't look at the soft limit hint - we should distribute.
352
+ query T
353
+ SELECT info FROM [EXPLAIN SELECT * FROM a INNER LOOKUP JOIN b ON k = j AND i < 10000 LIMIT 1]
354
+ WHERE info LIKE 'distribution%' OR info LIKE '%estimated row count%'
355
+ ----
356
+ distribution: full
357
+ estimated row count: 100 - 33,334 (33% of the table; stats collected <hidden> ago)
358
+
359
+ statement ok
360
+ RESET use_soft_limit_for_distribute_scan;
361
+
362
+ subtest end
0 commit comments