@@ -2,10 +2,15 @@ exec-ddl
2
2
CREATE TABLE t (
3
3
k INT PRIMARY KEY,
4
4
i INT,
5
- s STRING
5
+ s STRING,
6
+ INDEX (i)
6
7
)
7
8
----
8
9
10
+ # ------------------------
11
+ # Tests without Histograms
12
+ # ------------------------
13
+
9
14
exec-ddl
10
15
ALTER TABLE t INJECT STATISTICS '[
11
16
{
@@ -166,3 +171,295 @@ select
166
171
│ └── fd: (1)-->(2,3)
167
172
└── filters
168
173
└── (i:2 = $1) OR (s:3 = $2) [type=bool, outer=(2,3)]
174
+
175
+ # ---------------------
176
+ # Tests with Histograms
177
+ # ---------------------
178
+
179
+ exec-ddl
180
+ ALTER TABLE t INJECT STATISTICS '[
181
+ {
182
+ "columns": ["k"],
183
+ "created_at": "2018-01-01 1:00:00.00000+00:00",
184
+ "row_count": 1000,
185
+ "distinct_count": 1000
186
+ },
187
+ {
188
+ "columns": ["i"],
189
+ "created_at": "2018-01-01 1:00:00.00000+00:00",
190
+ "row_count": 1000,
191
+ "distinct_count": 41,
192
+ "null_count": 30,
193
+ "avg_size": 2,
194
+ "histo_col_type": "int",
195
+ "histo_buckets": [
196
+ {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"},
197
+ {"num_eq": 10, "num_range": 90, "distinct_range": 9, "upper_bound": "100"},
198
+ {"num_eq": 10, "num_range": 180, "distinct_range": 9, "upper_bound": "200"},
199
+ {"num_eq": 20, "num_range": 270, "distinct_range": 9, "upper_bound": "300"},
200
+ {"num_eq": 30, "num_range": 360, "distinct_range": 9, "upper_bound": "400"}
201
+ ]
202
+ },
203
+ {
204
+ "columns": ["s"],
205
+ "created_at": "2018-01-01 1:00:00.00000+00:00",
206
+ "row_count": 1000,
207
+ "distinct_count": 20,
208
+ "avg_size": 3,
209
+ "histo_col_type": "string",
210
+ "histo_buckets": [
211
+ {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "apple"},
212
+ {"num_eq": 300, "num_range": 100, "distinct_range": 9, "upper_bound": "banana"},
213
+ {"num_eq": 500, "num_range": 100, "distinct_range": 9, "upper_bound": "cherry"}
214
+ ]
215
+ }
216
+ ]'
217
+ ----
218
+
219
+ norm
220
+ SELECT * FROM t WHERE k = $1
221
+ ----
222
+ select
223
+ ├── columns: k:1(int!null) i:2(int) s:3(string)
224
+ ├── cardinality: [0 - 1]
225
+ ├── has-placeholder
226
+ ├── stats: [rows=1, distinct(1)=1, null(1)=0]
227
+ ├── key: ()
228
+ ├── fd: ()-->(1-3)
229
+ ├── scan t
230
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
231
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
232
+ │ ├── key: (1)
233
+ │ └── fd: (1)-->(2,3)
234
+ └── filters
235
+ └── k:1 = $1 [type=bool, outer=(1), constraints=(/1: (/NULL - ]), fd=()-->(1)]
236
+
237
+ # The row count of the filter is the max frequency of i's histogram.
238
+ norm
239
+ SELECT * FROM t WHERE i = $1
240
+ ----
241
+ select
242
+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
243
+ ├── has-placeholder
244
+ ├── stats: [rows=30, distinct(2)=1, null(2)=0]
245
+ ├── key: (1)
246
+ ├── fd: ()-->(2), (1)-->(3)
247
+ ├── scan t
248
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
249
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30]
250
+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
251
+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
252
+ │ ├── key: (1)
253
+ │ └── fd: (1)-->(2,3)
254
+ └── filters
255
+ └── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)]
256
+
257
+ # Similar case as above, but with opt to ensure the correct row counts are used
258
+ # for new memo groups.
259
+ opt
260
+ SELECT k FROM t WHERE i = $1
261
+ ----
262
+ project
263
+ ├── columns: k:1(int!null)
264
+ ├── has-placeholder
265
+ ├── stats: [rows=30]
266
+ ├── key: (1)
267
+ └── placeholder-scan t@t_i_idx
268
+ ├── columns: k:1(int!null) i:2(int!null)
269
+ ├── has-placeholder
270
+ ├── stats: [rows=30, distinct(2)=1, null(2)=0]
271
+ ├── key: (1)
272
+ ├── fd: ()-->(2)
273
+ └── span
274
+ └── $1 [type=int]
275
+
276
+ # Similar case as above, but with opt to ensure the correct row counts are used
277
+ # for new memo groups.
278
+ opt
279
+ SELECT * FROM t WHERE i = $1
280
+ ----
281
+ project
282
+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
283
+ ├── has-placeholder
284
+ ├── stats: [rows=30, distinct(2)=1, null(2)=0]
285
+ ├── key: (1)
286
+ ├── fd: ()-->(2), (1)-->(3)
287
+ └── inner-join (lookup t)
288
+ ├── columns: k:1(int!null) i:2(int!null) s:3(string) "$1":6(int!null)
289
+ ├── key columns: [1] = [1]
290
+ ├── lookup columns are key
291
+ ├── has-placeholder
292
+ ├── stats: [rows=30, distinct(2)=1, null(2)=0, distinct(6)=1, null(6)=0]
293
+ ├── key: (1)
294
+ ├── fd: ()-->(2,6), (1)-->(3), (2)==(6), (6)==(2)
295
+ ├── inner-join (lookup t@t_i_idx)
296
+ │ ├── columns: k:1(int!null) i:2(int!null) "$1":6(int!null)
297
+ │ ├── flags: disallow merge join
298
+ │ ├── key columns: [6] = [2]
299
+ │ ├── parameterized columns: (6)
300
+ │ ├── has-placeholder
301
+ │ ├── stats: [rows=30, distinct(2)=1, null(2)=0, distinct(6)=1, null(6)=0]
302
+ │ ├── key: (1)
303
+ │ ├── fd: ()-->(2,6), (2)==(6), (6)==(2)
304
+ │ ├── values
305
+ │ │ ├── columns: "$1":6(int)
306
+ │ │ ├── cardinality: [1 - 1]
307
+ │ │ ├── has-placeholder
308
+ │ │ ├── stats: [rows=1, distinct(6)=1, null(6)=0]
309
+ │ │ ├── key: ()
310
+ │ │ ├── fd: ()-->(6)
311
+ │ │ └── ($1,) [type=tuple{int}]
312
+ │ └── filters (true)
313
+ └── filters (true)
314
+
315
+ # The row count of the filter is the max frequency of s's histogram.
316
+ norm
317
+ SELECT * FROM t WHERE $1 = s
318
+ ----
319
+ select
320
+ ├── columns: k:1(int!null) i:2(int) s:3(string!null)
321
+ ├── has-placeholder
322
+ ├── stats: [rows=500, distinct(3)=1, null(3)=0]
323
+ ├── key: (1)
324
+ ├── fd: ()-->(3), (1)-->(2)
325
+ ├── scan t
326
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
327
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(3)=20, null(3)=0]
328
+ │ │ histogram(3)= 0 0 100 300 100 500
329
+ │ │ <--- 'apple' ----- 'banana' ----- 'cherry'
330
+ │ ├── key: (1)
331
+ │ └── fd: (1)-->(2,3)
332
+ └── filters
333
+ └── s:3 = $1 [type=bool, outer=(3), constraints=(/3: (/NULL - ]), fd=()-->(3)]
334
+
335
+ # Similar case to the previous one, but with a join on a values expression to
336
+ # mimic a parameterized join of a generic query plan.
337
+ # TODO(mgartner): The row count of the inner-join should be 500, because that is
338
+ # the maximum frequency of s. It is currently 50 because the v.s is not marked
339
+ # as a "parameterized column", which only happens during the
340
+ # GenerateParameterizedJoin exploration rule. I think we can address this by
341
+ # including paramterized columns in logical properties and propagating them
342
+ # upward.
343
+ norm
344
+ SELECT * FROM (VALUES ($1::STRING)) v(s) JOIN t ON t.s = v.s
345
+ ----
346
+ inner-join (hash)
347
+ ├── columns: s:1(string!null) k:2(int!null) i:3(int) s:4(string!null)
348
+ ├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one)
349
+ ├── has-placeholder
350
+ ├── stats: [rows=50, distinct(1)=1, null(1)=0, distinct(4)=1, null(4)=0]
351
+ ├── key: (2)
352
+ ├── fd: ()-->(1,4), (2)-->(3), (1)==(4), (4)==(1)
353
+ ├── values
354
+ │ ├── columns: column1:1(string)
355
+ │ ├── cardinality: [1 - 1]
356
+ │ ├── has-placeholder
357
+ │ ├── stats: [rows=1, distinct(1)=1, null(1)=0]
358
+ │ ├── key: ()
359
+ │ ├── fd: ()-->(1)
360
+ │ └── ($1,) [type=tuple{string}]
361
+ ├── scan t
362
+ │ ├── columns: k:2(int!null) i:3(int) s:4(string)
363
+ │ ├── stats: [rows=1000, distinct(4)=20, null(4)=0]
364
+ │ │ histogram(4)= 0 0 100 300 100 500
365
+ │ │ <--- 'apple' ----- 'banana' ----- 'cherry'
366
+ │ ├── key: (2)
367
+ │ └── fd: (2)-->(3,4)
368
+ └── filters
369
+ └── s:4 = column1:1 [type=bool, outer=(1,4), constraints=(/1: (/NULL - ]; /4: (/NULL - ]), fd=(1)==(4), (4)==(1)]
370
+
371
+ # The row count of the filter is based on the product of selectivities from the
372
+ # max frequencies of i's and s's histograms.
373
+ norm
374
+ SELECT * FROM t WHERE i = $1 AND s = $2
375
+ ----
376
+ select
377
+ ├── columns: k:1(int!null) i:2(int!null) s:3(string!null)
378
+ ├── has-placeholder
379
+ ├── stats: [rows=15, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=1, null(2,3)=0]
380
+ ├── key: (1)
381
+ ├── fd: ()-->(2,3)
382
+ ├── scan t
383
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
384
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30, distinct(3)=20, null(3)=0, distinct(2,3)=820, null(2,3)=0]
385
+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
386
+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
387
+ │ │ histogram(3)= 0 0 100 300 100 500
388
+ │ │ <--- 'apple' ----- 'banana' ----- 'cherry'
389
+ │ ├── key: (1)
390
+ │ └── fd: (1)-->(2,3)
391
+ └── filters
392
+ ├── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)]
393
+ └── s:3 = $2 [type=bool, outer=(3), constraints=(/3: (/NULL - ]), fd=()-->(3)]
394
+
395
+ norm
396
+ SELECT * FROM t WHERE i > $1
397
+ ----
398
+ select
399
+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
400
+ ├── has-placeholder
401
+ ├── stats: [rows=323.333, distinct(2)=41, null(2)=0]
402
+ ├── key: (1)
403
+ ├── fd: (1)-->(2,3)
404
+ ├── scan t
405
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
406
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30]
407
+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
408
+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
409
+ │ ├── key: (1)
410
+ │ └── fd: (1)-->(2,3)
411
+ └── filters
412
+ └── i:2 > $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ])]
413
+
414
+ norm
415
+ SELECT * FROM t WHERE i = $1 OR i = $2
416
+ ----
417
+ select
418
+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
419
+ ├── has-placeholder
420
+ ├── stats: [rows=323.333, distinct(2)=41, null(2)=0]
421
+ ├── key: (1)
422
+ ├── fd: (1)-->(2,3)
423
+ ├── scan t
424
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
425
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30]
426
+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
427
+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
428
+ │ ├── key: (1)
429
+ │ └── fd: (1)-->(2,3)
430
+ └── filters
431
+ └── (i:2 = $1) OR (i:2 = $2) [type=bool, outer=(2), constraints=(/2: (/NULL - ])]
432
+
433
+ norm
434
+ SELECT * FROM t WHERE i IN ($1, $2, $3)
435
+ ----
436
+ select
437
+ ├── columns: k:1(int!null) i:2(int) s:3(string)
438
+ ├── has-placeholder
439
+ ├── stats: [rows=333.333]
440
+ ├── key: (1)
441
+ ├── fd: (1)-->(2,3)
442
+ ├── scan t
443
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
444
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
445
+ │ ├── key: (1)
446
+ │ └── fd: (1)-->(2,3)
447
+ └── filters
448
+ └── i:2 IN ($1, $2, $3) [type=bool, outer=(2)]
449
+
450
+ norm
451
+ SELECT * FROM t WHERE i = $1 OR s = $2
452
+ ----
453
+ select
454
+ ├── columns: k:1(int!null) i:2(int) s:3(string)
455
+ ├── has-placeholder
456
+ ├── stats: [rows=333.333]
457
+ ├── key: (1)
458
+ ├── fd: (1)-->(2,3)
459
+ ├── scan t
460
+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
461
+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
462
+ │ ├── key: (1)
463
+ │ └── fd: (1)-->(2,3)
464
+ └── filters
465
+ └── (i:2 = $1) OR (s:3 = $2) [type=bool, outer=(2,3)]
0 commit comments