@@ -2,10 +2,15 @@ exec-ddl
22CREATE TABLE t (
33 k INT PRIMARY KEY,
44 i INT,
5- s STRING
5+ s STRING,
6+ INDEX (i)
67)
78----
89
10+ # ------------------------
11+ # Tests without Histograms
12+ # ------------------------
13+
914exec-ddl
1015ALTER TABLE t INJECT STATISTICS '[
1116 {
@@ -166,3 +171,295 @@ select
166171 │ └── fd: (1)-->(2,3)
167172 └── filters
168173 └── (i:2 = $1) OR (s:3 = $2) [type=bool, outer=(2,3)]
174+
175+ # ---------------------
176+ # Tests with Histograms
177+ # ---------------------
178+
179+ exec-ddl
180+ ALTER TABLE t INJECT STATISTICS '[
181+ {
182+ "columns": ["k"],
183+ "created_at": "2018-01-01 1:00:00.00000+00:00",
184+ "row_count": 1000,
185+ "distinct_count": 1000
186+ },
187+ {
188+ "columns": ["i"],
189+ "created_at": "2018-01-01 1:00:00.00000+00:00",
190+ "row_count": 1000,
191+ "distinct_count": 41,
192+ "null_count": 30,
193+ "avg_size": 2,
194+ "histo_col_type": "int",
195+ "histo_buckets": [
196+ {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "0"},
197+ {"num_eq": 10, "num_range": 90, "distinct_range": 9, "upper_bound": "100"},
198+ {"num_eq": 10, "num_range": 180, "distinct_range": 9, "upper_bound": "200"},
199+ {"num_eq": 20, "num_range": 270, "distinct_range": 9, "upper_bound": "300"},
200+ {"num_eq": 30, "num_range": 360, "distinct_range": 9, "upper_bound": "400"}
201+ ]
202+ },
203+ {
204+ "columns": ["s"],
205+ "created_at": "2018-01-01 1:00:00.00000+00:00",
206+ "row_count": 1000,
207+ "distinct_count": 20,
208+ "avg_size": 3,
209+ "histo_col_type": "string",
210+ "histo_buckets": [
211+ {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "apple"},
212+ {"num_eq": 300, "num_range": 100, "distinct_range": 9, "upper_bound": "banana"},
213+ {"num_eq": 500, "num_range": 100, "distinct_range": 9, "upper_bound": "cherry"}
214+ ]
215+ }
216+ ]'
217+ ----
218+
219+ norm
220+ SELECT * FROM t WHERE k = $1
221+ ----
222+ select
223+ ├── columns: k:1(int!null) i:2(int) s:3(string)
224+ ├── cardinality: [0 - 1]
225+ ├── has-placeholder
226+ ├── stats: [rows=1, distinct(1)=1, null(1)=0]
227+ ├── key: ()
228+ ├── fd: ()-->(1-3)
229+ ├── scan t
230+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
231+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
232+ │ ├── key: (1)
233+ │ └── fd: (1)-->(2,3)
234+ └── filters
235+ └── k:1 = $1 [type=bool, outer=(1), constraints=(/1: (/NULL - ]), fd=()-->(1)]
236+
237+ # The row count of the filter is the max frequency of i's histogram.
238+ norm
239+ SELECT * FROM t WHERE i = $1
240+ ----
241+ select
242+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
243+ ├── has-placeholder
244+ ├── stats: [rows=30, distinct(2)=1, null(2)=0]
245+ ├── key: (1)
246+ ├── fd: ()-->(2), (1)-->(3)
247+ ├── scan t
248+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
249+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30]
250+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
251+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
252+ │ ├── key: (1)
253+ │ └── fd: (1)-->(2,3)
254+ └── filters
255+ └── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)]
256+
257+ # Similar case as above, but with opt to ensure the correct row counts are used
258+ # for new memo groups.
259+ opt
260+ SELECT k FROM t WHERE i = $1
261+ ----
262+ project
263+ ├── columns: k:1(int!null)
264+ ├── has-placeholder
265+ ├── stats: [rows=30]
266+ ├── key: (1)
267+ └── placeholder-scan t@t_i_idx
268+ ├── columns: k:1(int!null) i:2(int!null)
269+ ├── has-placeholder
270+ ├── stats: [rows=30, distinct(2)=1, null(2)=0]
271+ ├── key: (1)
272+ ├── fd: ()-->(2)
273+ └── span
274+ └── $1 [type=int]
275+
276+ # Similar case as above, but with opt to ensure the correct row counts are used
277+ # for new memo groups.
278+ opt
279+ SELECT * FROM t WHERE i = $1
280+ ----
281+ project
282+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
283+ ├── has-placeholder
284+ ├── stats: [rows=30, distinct(2)=1, null(2)=0]
285+ ├── key: (1)
286+ ├── fd: ()-->(2), (1)-->(3)
287+ └── inner-join (lookup t)
288+ ├── columns: k:1(int!null) i:2(int!null) s:3(string) "$1":6(int!null)
289+ ├── key columns: [1] = [1]
290+ ├── lookup columns are key
291+ ├── has-placeholder
292+ ├── stats: [rows=30, distinct(2)=1, null(2)=0, distinct(6)=1, null(6)=0]
293+ ├── key: (1)
294+ ├── fd: ()-->(2,6), (1)-->(3), (2)==(6), (6)==(2)
295+ ├── inner-join (lookup t@t_i_idx)
296+ │ ├── columns: k:1(int!null) i:2(int!null) "$1":6(int!null)
297+ │ ├── flags: disallow merge join
298+ │ ├── key columns: [6] = [2]
299+ │ ├── parameterized columns: (6)
300+ │ ├── has-placeholder
301+ │ ├── stats: [rows=30, distinct(2)=1, null(2)=0, distinct(6)=1, null(6)=0]
302+ │ ├── key: (1)
303+ │ ├── fd: ()-->(2,6), (2)==(6), (6)==(2)
304+ │ ├── values
305+ │ │ ├── columns: "$1":6(int)
306+ │ │ ├── cardinality: [1 - 1]
307+ │ │ ├── has-placeholder
308+ │ │ ├── stats: [rows=1, distinct(6)=1, null(6)=0]
309+ │ │ ├── key: ()
310+ │ │ ├── fd: ()-->(6)
311+ │ │ └── ($1,) [type=tuple{int}]
312+ │ └── filters (true)
313+ └── filters (true)
314+
315+ # The row count of the filter is the max frequency of s's histogram.
316+ norm
317+ SELECT * FROM t WHERE $1 = s
318+ ----
319+ select
320+ ├── columns: k:1(int!null) i:2(int) s:3(string!null)
321+ ├── has-placeholder
322+ ├── stats: [rows=500, distinct(3)=1, null(3)=0]
323+ ├── key: (1)
324+ ├── fd: ()-->(3), (1)-->(2)
325+ ├── scan t
326+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
327+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(3)=20, null(3)=0]
328+ │ │ histogram(3)= 0 0 100 300 100 500
329+ │ │ <--- 'apple' ----- 'banana' ----- 'cherry'
330+ │ ├── key: (1)
331+ │ └── fd: (1)-->(2,3)
332+ └── filters
333+ └── s:3 = $1 [type=bool, outer=(3), constraints=(/3: (/NULL - ]), fd=()-->(3)]
334+
335+ # Similar case to the previous one, but with a join on a values expression to
336+ # mimic a parameterized join of a generic query plan.
337+ # TODO(mgartner): The row count of the inner-join should be 500, because that is
338+ # the maximum frequency of s. It is currently 50 because the v.s is not marked
339+ # as a "parameterized column", which only happens during the
340+ # GenerateParameterizedJoin exploration rule. I think we can address this by
341+ # including paramterized columns in logical properties and propagating them
342+ # upward.
343+ norm
344+ SELECT * FROM (VALUES ($1::STRING)) v(s) JOIN t ON t.s = v.s
345+ ----
346+ inner-join (hash)
347+ ├── columns: s:1(string!null) k:2(int!null) i:3(int) s:4(string!null)
348+ ├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one)
349+ ├── has-placeholder
350+ ├── stats: [rows=50, distinct(1)=1, null(1)=0, distinct(4)=1, null(4)=0]
351+ ├── key: (2)
352+ ├── fd: ()-->(1,4), (2)-->(3), (1)==(4), (4)==(1)
353+ ├── values
354+ │ ├── columns: column1:1(string)
355+ │ ├── cardinality: [1 - 1]
356+ │ ├── has-placeholder
357+ │ ├── stats: [rows=1, distinct(1)=1, null(1)=0]
358+ │ ├── key: ()
359+ │ ├── fd: ()-->(1)
360+ │ └── ($1,) [type=tuple{string}]
361+ ├── scan t
362+ │ ├── columns: k:2(int!null) i:3(int) s:4(string)
363+ │ ├── stats: [rows=1000, distinct(4)=20, null(4)=0]
364+ │ │ histogram(4)= 0 0 100 300 100 500
365+ │ │ <--- 'apple' ----- 'banana' ----- 'cherry'
366+ │ ├── key: (2)
367+ │ └── fd: (2)-->(3,4)
368+ └── filters
369+ └── s:4 = column1:1 [type=bool, outer=(1,4), constraints=(/1: (/NULL - ]; /4: (/NULL - ]), fd=(1)==(4), (4)==(1)]
370+
371+ # The row count of the filter is based on the product of selectivities from the
372+ # max frequencies of i's and s's histograms.
373+ norm
374+ SELECT * FROM t WHERE i = $1 AND s = $2
375+ ----
376+ select
377+ ├── columns: k:1(int!null) i:2(int!null) s:3(string!null)
378+ ├── has-placeholder
379+ ├── stats: [rows=15, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=1, null(2,3)=0]
380+ ├── key: (1)
381+ ├── fd: ()-->(2,3)
382+ ├── scan t
383+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
384+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30, distinct(3)=20, null(3)=0, distinct(2,3)=820, null(2,3)=0]
385+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
386+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
387+ │ │ histogram(3)= 0 0 100 300 100 500
388+ │ │ <--- 'apple' ----- 'banana' ----- 'cherry'
389+ │ ├── key: (1)
390+ │ └── fd: (1)-->(2,3)
391+ └── filters
392+ ├── i:2 = $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ]), fd=()-->(2)]
393+ └── s:3 = $2 [type=bool, outer=(3), constraints=(/3: (/NULL - ]), fd=()-->(3)]
394+
395+ norm
396+ SELECT * FROM t WHERE i > $1
397+ ----
398+ select
399+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
400+ ├── has-placeholder
401+ ├── stats: [rows=323.333, distinct(2)=41, null(2)=0]
402+ ├── key: (1)
403+ ├── fd: (1)-->(2,3)
404+ ├── scan t
405+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
406+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30]
407+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
408+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
409+ │ ├── key: (1)
410+ │ └── fd: (1)-->(2,3)
411+ └── filters
412+ └── i:2 > $1 [type=bool, outer=(2), constraints=(/2: (/NULL - ])]
413+
414+ norm
415+ SELECT * FROM t WHERE i = $1 OR i = $2
416+ ----
417+ select
418+ ├── columns: k:1(int!null) i:2(int!null) s:3(string)
419+ ├── has-placeholder
420+ ├── stats: [rows=323.333, distinct(2)=41, null(2)=0]
421+ ├── key: (1)
422+ ├── fd: (1)-->(2,3)
423+ ├── scan t
424+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
425+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=41, null(2)=30]
426+ │ │ histogram(2)= 0 30 0 0 90 10 180 10 270 20 360 30
427+ │ │ <--- NULL --- 0 ---- 100 ----- 200 ----- 300 ----- 400
428+ │ ├── key: (1)
429+ │ └── fd: (1)-->(2,3)
430+ └── filters
431+ └── (i:2 = $1) OR (i:2 = $2) [type=bool, outer=(2), constraints=(/2: (/NULL - ])]
432+
433+ norm
434+ SELECT * FROM t WHERE i IN ($1, $2, $3)
435+ ----
436+ select
437+ ├── columns: k:1(int!null) i:2(int) s:3(string)
438+ ├── has-placeholder
439+ ├── stats: [rows=333.333]
440+ ├── key: (1)
441+ ├── fd: (1)-->(2,3)
442+ ├── scan t
443+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
444+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
445+ │ ├── key: (1)
446+ │ └── fd: (1)-->(2,3)
447+ └── filters
448+ └── i:2 IN ($1, $2, $3) [type=bool, outer=(2)]
449+
450+ norm
451+ SELECT * FROM t WHERE i = $1 OR s = $2
452+ ----
453+ select
454+ ├── columns: k:1(int!null) i:2(int) s:3(string)
455+ ├── has-placeholder
456+ ├── stats: [rows=333.333]
457+ ├── key: (1)
458+ ├── fd: (1)-->(2,3)
459+ ├── scan t
460+ │ ├── columns: k:1(int!null) i:2(int) s:3(string)
461+ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
462+ │ ├── key: (1)
463+ │ └── fd: (1)-->(2,3)
464+ └── filters
465+ └── (i:2 = $1) OR (s:3 = $2) [type=bool, outer=(2,3)]
0 commit comments