diff --git a/contrib/pax_storage/expected/runtime_filter.out b/contrib/pax_storage/expected/runtime_filter.out new file mode 100644 index 00000000000..5ef275b103a --- /dev/null +++ b/contrib/pax_storage/expected/runtime_filter.out @@ -0,0 +1,365 @@ +SET optimizer TO on; +-- Test Suit 1: runtime filter main case +DROP TABLE IF EXISTS fact_rf, dim_rf; +NOTICE: table "fact_rf" does not exist, skipping +NOTICE: table "dim_rf" does not exist, skipping +CREATE TABLE fact_rf (fid int, did int, val int) using pax WITH(minmax_columns='fid,did,val'); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'fid' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE TABLE dim_rf (did int, proj_id int, filter_val int) using pax WITH(minmax_columns='did,proj_id,filter_val'); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'did' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +-- Generating data, fact_rd.did and dim_rf.did is 80% matched +INSERT INTO fact_rf SELECT i, i % 8000 + 1, i FROM generate_series(1, 100000) s(i); +INSERT INTO dim_rf SELECT i, i % 10, i FROM generate_series(1, 10000) s(i); +ANALYZE fact_rf, dim_rf; +SET gp_enable_runtime_filter_pushdown TO off; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1486 width=1) (actual time=4.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..4.000 rows=33462 loops=1) + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..4.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 8.388 ms + (slice0) Executor memory: 63K bytes. + (slice1) Executor memory: 4269K bytes avg x 3x(0) workers, 4269K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 9.602 ms +(20 rows) + +SET gp_enable_runtime_filter_pushdown TO on; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1486 width=1) (actual time=0.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) RF: fact_rf attrno: 2, range[1, 1000], n_distinct: -0.22 + (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + Extra Text: (seg0) ScanKey[did] >= 1, ScanKey[did] <= 1000 + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 6.353 ms + (slice0) Executor memory: 63K bytes. + (slice1) Executor memory: 4400K bytes avg x 3x(0) workers, 4400K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 7.437 ms +(22 rows) + +-- Test bad filter rate +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 7; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..870.16 rows=1 width=8) (actual time=12.001..12.001 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..870.16 rows=1 width=8) (actual time=12.001..12.001 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..870.16 rows=1 width=8) (actual time=12.001..12.001 rows=1 loops=1) + -> Hash Join (cost=0.00..870.16 rows=29211 width=1) (actual time=4.000..8.000 rows=23492 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg2) Hash chain length 1.0 avg, 2 max, using 2285 of 524288 buckets. + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..432.51 rows=33334 width=4) (actual time=0.000..4.000 rows=33501 loops=1) + Hash Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..4.000 rows=33462 loops=1) + -> Hash (cost=431.23..431.23 rows=2334 width=4) (actual time=0.000..0.000 rows=2368 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4180kB + -> Seq Scan on dim_rf (cost=0.00..431.23 rows=2334 width=4) (actual time=0.000..0.000 rows=2368 loops=1) + Filter: (proj_id < 7) + Rows Removed by Filter: 1017 + Planning Time: 8.409 ms + (slice0) Executor memory: 62K bytes. + (slice1) Executor memory: 4252K bytes avg x 3x(0) workers, 4252K bytes max (seg0). Work_mem: 4180K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 12.455 ms +(21 rows) + +-- Test outer join +-- LeftJoin (eliminated and applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1486 width=1) (actual time=0.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) RF: fact_rf attrno: 2, range[1, 1000], n_distinct: -0.22 + (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + Extra Text: (seg0) ScanKey[did] >= 1, ScanKey[did] <= 1000 + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 7.237 ms + (slice0) Executor memory: 63K bytes. + (slice1) Executor memory: 4399K bytes avg x 3x(0) workers, 4399K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 7.955 ms +(22 rows) + +-- LeftJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..876.97 rows=1 width=8) (actual time=12.001..12.001 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..876.97 rows=1 width=8) (actual time=12.001..12.001 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..876.97 rows=1 width=8) (actual time=12.001..12.001 rows=1 loops=1) + -> Result (cost=0.00..876.97 rows=32949 width=1) (actual time=0.000..12.001 rows=1040 loops=1) + Filter: ((dim_rf.proj_id IS NULL) OR ((dim_rf.proj_id < 2) AND (dim_rf.filter_val <= 1000))) + -> Hash Left Join (cost=0.00..872.63 rows=65944 width=8) (actual time=0.000..8.000 rows=33501 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) Hash chain length 1.0 avg, 2 max, using 3377 of 524288 buckets. + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..432.51 rows=33334 width=4) (actual time=0.000..4.000 rows=33501 loops=1) + Hash Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..4.000 rows=33462 loops=1) + -> Hash (cost=431.08..431.08 rows=3334 width=12) (actual time=0.000..0.000 rows=3385 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4242kB + -> Seq Scan on dim_rf (cost=0.00..431.08 rows=3334 width=12) (actual time=0.000..0.000 rows=3385 loops=1) + Planning Time: 7.118 ms + (slice0) Executor memory: 66K bytes. + (slice1) Executor memory: 4318K bytes avg x 3x(0) workers, 4318K bytes max (seg0). Work_mem: 4242K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 14.055 ms +(21 rows) + +-- RightJoin (applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.72 rows=1 width=8) (actual time=44.002..44.002 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.72 rows=1 width=8) (actual time=44.002..44.002 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.72 rows=1 width=8) (actual time=44.002..44.002 rows=1 loops=1) + -> Hash Right Join (cost=0.00..869.72 rows=1546 width=1) (actual time=0.000..44.002 rows=1040 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 80 of 524288 buckets. + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..432.51 rows=33334 width=4) (actual time=0.000..8.000 rows=33501 loops=1) + Hash Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..4.000 rows=33462 loops=1) + -> Hash (cost=431.31..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4099kB + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 6.693 ms + (slice0) Executor memory: 63K bytes. + (slice1) Executor memory: 4238K bytes avg x 3x(0) workers, 4238K bytes max (seg0). Work_mem: 4099K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 43.338 ms +(21 rows) + +-- SemiJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Semi Join (cost=0.00..869.37 rows=1486 width=1) (actual time=4.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) RF: fact_rf attrno: 2, range[1, 1000], n_distinct: -0.22 + (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..4.000 rows=33462 loops=1) + Extra Text: (seg0) ScanKey[did] >= 1, ScanKey[did] <= 1000 + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=4.000..4.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=4.000..4.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 20.060 ms + (slice0) Executor memory: 63K bytes. + (slice1) Executor memory: 4399K bytes avg x 3x(0) workers, 4399K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 8.057 ms +(22 rows) + +-- SemiJoin -> InnerJoin and deduplicate +EXPLAIN ANALYZE SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Finalize Aggregate (cost=0.00..868.28 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..868.28 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..868.28 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..868.28 rows=119 width=1) (actual time=8.000..8.000 rows=80 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 80 of 131072 buckets. + -> HashAggregate (cost=0.00..436.48 rows=2663 width=4) (actual time=8.000..8.000 rows=2679 loops=1) + Group Key: fact_rf.did + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..436.16 rows=2663 width=4) (actual time=4.000..8.000 rows=7979 loops=1) + Hash Key: fact_rf.did + -> Streaming HashAggregate (cost=0.00..436.12 rows=2663 width=4) (actual time=4.000..4.000 rows=7943 loops=1) + Group Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + -> Hash (cost=431.31..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Buckets: 131072 Batches: 1 Memory Usage: 1027kB + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 9.647 ms + (slice0) Executor memory: 280K bytes. + (slice1) Executor memory: 1307K bytes avg x 3x(0) workers, 1308K bytes max (seg1). Work_mem: 1027K bytes max. + (slice2) Executor memory: 825K bytes avg x 3x(0) workers, 832K bytes max (seg2). Work_mem: 913K bytes max. + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 8.130 ms +(25 rows) + +-- Test correctness +SELECT * FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND dim_rf.filter_val = 1 + ORDER BY fid; + fid | did | val | did | proj_id | filter_val +-------+-----+-------+-----+---------+------------ + 8000 | 1 | 8000 | 1 | 1 | 1 + 16000 | 1 | 16000 | 1 | 1 | 1 + 24000 | 1 | 24000 | 1 | 1 | 1 + 32000 | 1 | 32000 | 1 | 1 | 1 + 40000 | 1 | 40000 | 1 | 1 | 1 + 48000 | 1 | 48000 | 1 | 1 | 1 + 56000 | 1 | 56000 | 1 | 1 | 1 + 64000 | 1 | 64000 | 1 | 1 | 1 + 72000 | 1 | 72000 | 1 | 1 | 1 + 80000 | 1 | 80000 | 1 | 1 | 1 + 88000 | 1 | 88000 | 1 | 1 | 1 + 96000 | 1 | 96000 | 1 | 1 | 1 +(12 rows) + +SELECT * FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE dim_rf.filter_val = 1 + ORDER BY fid; + fid | did | val | did | proj_id | filter_val +-------+-----+-------+-----+---------+------------ + 8000 | 1 | 8000 | 1 | 1 | 1 + 16000 | 1 | 16000 | 1 | 1 | 1 + 24000 | 1 | 24000 | 1 | 1 | 1 + 32000 | 1 | 32000 | 1 | 1 | 1 + 40000 | 1 | 40000 | 1 | 1 | 1 + 48000 | 1 | 48000 | 1 | 1 | 1 + 56000 | 1 | 56000 | 1 | 1 | 1 + 64000 | 1 | 64000 | 1 | 1 | 1 + 72000 | 1 | 72000 | 1 | 1 | 1 + 80000 | 1 | 80000 | 1 | 1 | 1 + 88000 | 1 | 88000 | 1 | 1 | 1 + 96000 | 1 | 96000 | 1 | 1 | 1 +(12 rows) + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + count +------- + 200 +(1 row) + +-- Test contain null values +INSERT INTO dim_rf VALUES (NULL,1, 1); +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1486 width=1) (actual time=0.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=201 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=81 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 13.072 ms + (slice0) Executor memory: 63K bytes. + (slice1) Executor memory: 4398K bytes avg x 3x(0) workers, 4398K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 119K bytes avg x 3x(0) workers, 119K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 8.964 ms +(20 rows) + +SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +-- Clean up: reset guc +SET gp_enable_runtime_filter_pushdown TO off; +RESET optimizer; diff --git a/contrib/pax_storage/pax_schedule b/contrib/pax_storage/pax_schedule index f9818f8e0b7..d0d633ccab3 100644 --- a/contrib/pax_storage/pax_schedule +++ b/contrib/pax_storage/pax_schedule @@ -20,5 +20,6 @@ test: dictionary_encoding test: cluster test: db_size_functions +test: runtime_filter test: teardown diff --git a/contrib/pax_storage/sql/runtime_filter.sql b/contrib/pax_storage/sql/runtime_filter.sql new file mode 100644 index 00000000000..f4ca79b12e1 --- /dev/null +++ b/contrib/pax_storage/sql/runtime_filter.sql @@ -0,0 +1,86 @@ +SET optimizer TO on; + +-- Test Suit 1: runtime filter main case +DROP TABLE IF EXISTS fact_rf, dim_rf; +CREATE TABLE fact_rf (fid int, did int, val int) using pax WITH(minmax_columns='fid,did,val'); +CREATE TABLE dim_rf (did int, proj_id int, filter_val int) using pax WITH(minmax_columns='did,proj_id,filter_val'); + +-- Generating data, fact_rd.did and dim_rf.did is 80% matched +INSERT INTO fact_rf SELECT i, i % 8000 + 1, i FROM generate_series(1, 100000) s(i); +INSERT INTO dim_rf SELECT i, i % 10, i FROM generate_series(1, 10000) s(i); +ANALYZE fact_rf, dim_rf; + +SET gp_enable_runtime_filter_pushdown TO off; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + +SET gp_enable_runtime_filter_pushdown TO on; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + +-- Test bad filter rate +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 7; + +-- Test outer join +-- LeftJoin (eliminated and applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +-- LeftJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + +-- RightJoin (applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +-- SemiJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + +-- SemiJoin -> InnerJoin and deduplicate +EXPLAIN ANALYZE SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + +-- Test correctness +SELECT * FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND dim_rf.filter_val = 1 + ORDER BY fid; + +SELECT * FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE dim_rf.filter_val = 1 + ORDER BY fid; + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + +SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + +SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + +-- Test contain null values +INSERT INTO dim_rf VALUES (NULL,1, 1); +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; +SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + +-- Clean up: reset guc +SET gp_enable_runtime_filter_pushdown TO off; +RESET optimizer; diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc index da07cddd5d7..c6f692c79ee 100644 --- a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc +++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc @@ -443,6 +443,7 @@ const TupleTableSlotOps *PaxAccessMethod::SlotCallbacks( uint32 PaxAccessMethod::ScanFlags(Relation relation) { uint32 flags = 0; + std::vector minmax_columns; #ifdef VEC_BUILD flags |= SCAN_SUPPORT_VECTORIZATION | SCAN_SUPPORT_COLUMN_ORIENTED_SCAN; #else @@ -452,6 +453,10 @@ uint32 PaxAccessMethod::ScanFlags(Relation relation) { #if defined(USE_MANIFEST_API) && !defined(USE_PAX_CATALOG) flags |= SCAN_FORCE_BIG_WRITE_LOCK; #endif + minmax_columns = cbdb::GetMinMaxColumnIndexes(relation); + if (!minmax_columns.empty()) { + flags |= SCAN_SUPPORT_RUNTIME_FILTER; + } return flags; } diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 62d3c2da790..c2a1f40e83f 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -2584,6 +2584,7 @@ ExecHashTableExplainEnd(PlanState *planstate, struct StringInfoData *buf) Instrumentation *jinstrument = hjstate->js.ps.instrument; int total_buckets; int i; + HashState *hashState = (HashState *) innerPlanState(hjstate); if (!hashtable || !hashtable->stats || @@ -2598,11 +2599,13 @@ ExecHashTableExplainEnd(PlanState *planstate, struct StringInfoData *buf) if (!hashtable->eagerlyReleased) { - HashState *hashState = (HashState *) innerPlanState(hjstate); - /* Report on batch in progress, in case the join is being ended early. */ ExecHashTableExplainBatchEnd(hashState, hashtable); } + if (gp_enable_runtime_filter_pushdown && hashState->filters) + { + ExecRFExplainEnd(hashState, buf); + } /* Report actual work_mem high water mark. */ jinstrument->workmemused = Max(jinstrument->workmemused, stats->workmem_max); @@ -4161,7 +4164,7 @@ PushdownRuntimeFilter(HashState *node) scankeys = NIL; attr_filter = lfirst(lc); - if (attr_filter->empty || + if (attr_filter->empty || attr_filter->hasnulls || (!IsA(attr_filter->target, SeqScanState) && !IsA(attr_filter->target, DynamicSeqScanState))) continue; @@ -4172,41 +4175,72 @@ PushdownRuntimeFilter(HashState *node) sk->sk_attno = attr_filter->lattno; sk->sk_subtype = INT8OID; sk->sk_argument = PointerGetDatum(attr_filter->blm_filter); + sk->sk_collation = attr_filter->collation; scankeys = lappend(scankeys, sk); + if (attr_filter->n_distinct > 0) + { + SeqScanState *sss = castNode(SeqScanState, attr_filter->target); + int64 range = attr_filter->max - attr_filter->min + 1; + if ((range / attr_filter->n_distinct) > gp_runtime_filter_selectivity_threshold) + { + /* push previous scankeys */ + sss->filters = list_concat(sss->filters, scankeys); + continue; + } + } /* range filter */ sk = (ScanKey)palloc0(sizeof(ScanKeyData)); sk->sk_flags = 0; sk->sk_attno = attr_filter->lattno; sk->sk_strategy = BTGreaterEqualStrategyNumber; - sk->sk_subtype = INT8OID; + sk->sk_subtype = attr_filter->vartype; sk->sk_argument = attr_filter->min; + sk->sk_collation = attr_filter->collation; scankeys = lappend(scankeys, sk); sk = (ScanKey)palloc0(sizeof(ScanKeyData)); sk->sk_flags = 0; sk->sk_attno = attr_filter->lattno; sk->sk_strategy = BTLessEqualStrategyNumber; - sk->sk_subtype = INT8OID; + sk->sk_subtype = attr_filter->vartype; sk->sk_argument = attr_filter->max; + sk->sk_collation = attr_filter->collation; scankeys = lappend(scankeys, sk); /* append new runtime filters to target node */ if (IsA(attr_filter->target, SeqScanState)) { SeqScanState *sss = castNode(SeqScanState, attr_filter->target); - sss->filters = list_concat(sss->filters, scankeys); + if (sss->ss.ss_currentScanDesc != NULL) + { + /* if seqscan is started, we can't pushdown the runtime filter */ + list_free_deep(scankeys); + } + else + { + sss->filters = list_concat(sss->filters, scankeys); + } } else if (IsA(attr_filter->target, DynamicSeqScanState)) { DynamicSeqScanState *dsss = castNode(DynamicSeqScanState, attr_filter->target); - dsss->filters = list_concat(dsss->filters, scankeys); + if (dsss->ss.ss_currentScanDesc != NULL) + { + /* if dynamic seqscan is started, we can't pushdown the runtime filter */ + list_free_deep(scankeys); + } + else + { + dsss->filters = list_concat(dsss->filters, scankeys); + } } else { /* never reach here */ pg_unreachable(); } + } } @@ -4221,10 +4255,15 @@ AddTupleValuesIntoRF(HashState *node, TupleTableSlot *slot) foreach (lc, node->filters) { attr_filter = (AttrFilter *) lfirst(lc); + if (attr_filter->hasnulls) + continue; val = slot_getattr(slot, attr_filter->rattno, &isnull); if (isnull) + { + attr_filter->hasnulls = true; continue; + } attr_filter->empty = false; @@ -4274,6 +4313,7 @@ ResetRuntimeFilter(HashState *node) { attr_filter = lfirst(lc); attr_filter->empty = true; + attr_filter->hasnulls = false; if (IsA(attr_filter->target, SeqScanState)) { @@ -4304,7 +4344,7 @@ ResetRuntimeFilter(HashState *node) attr_filter->blm_filter = bloom_create_aggresive(node->ps.plan->plan_rows, work_mem, - random()); + gp_session_id); StaticAssertDecl(sizeof(LONG_MAX) == sizeof(Datum), "sizeof(LONG_MAX) should be equal to sizeof(Datum)"); StaticAssertDecl(sizeof(LONG_MIN) == sizeof(Datum), "sizeof(LONG_MIN) should be equal to sizeof(Datum)"); diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 9ec70f16e31..ca56a095d2a 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -110,6 +110,8 @@ #include "access/htup_details.h" #include "access/parallel.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_namespace.h" #include "executor/executor.h" #include "executor/hashjoin.h" #include "executor/instrument.h" /* Instrumentation */ @@ -118,9 +120,12 @@ #include "executor/nodeRuntimeFilter.h" #include "miscadmin.h" #include "pgstat.h" +#include "utils/datum.h" #include "utils/guc.h" #include "utils/fmgroids.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/rel.h" #include "utils/sharedtuplestore.h" #include "cdb/cdbvars.h" @@ -168,10 +173,10 @@ static bool IsEqualOp(Expr *expr); static bool CheckEqualArgs(Expr *expr, AttrNumber *lattno, AttrNumber *rattno); static bool CheckTargetNode(PlanState *node, AttrNumber attno, - AttrNumber *lattno); + AttrNumber *lattno, Oid *collation, Oid *var_type); static List *FindTargetNodes(HashJoinState *hjstate, AttrNumber attno, - AttrNumber *lattno); + AttrNumber *lattno, Oid *collation, Oid *var_type); static AttrFilter *CreateAttrFilter(PlanState *target, AttrNumber lattno, AttrNumber rattno, @@ -2192,6 +2197,8 @@ CreateRuntimeFilter(HashJoinState* hjstate) AttrFilter *attr_filter; ListCell *lc; List *targets; + Oid var_type = InvalidOid; + Oid collation = InvalidOid; /* * A build-side Bloom filter tells us if a row is definitely not in the build @@ -2232,7 +2239,7 @@ CreateRuntimeFilter(HashJoinState* hjstate) if (lattno < 1 || rattno < 1) continue; - targets = FindTargetNodes(hjstate, lattno, &lattno); + targets = FindTargetNodes(hjstate, lattno, &lattno, &collation, &var_type); if (lattno == -1 || targets == NULL) continue; @@ -2243,6 +2250,8 @@ CreateRuntimeFilter(HashJoinState* hjstate) attr_filter = CreateAttrFilter(target, lattno, rattno, hstate->ps.plan->plan_rows); + attr_filter->vartype = var_type; + attr_filter->collation = collation; if (attr_filter->blm_filter) hstate->filters = lappend(hstate->filters, attr_filter); else @@ -2329,7 +2338,7 @@ CheckEqualArgs(Expr *expr, AttrNumber *lattno, AttrNumber *rattno) } static bool -CheckTargetNode(PlanState *node, AttrNumber attno, AttrNumber *lattno) +CheckTargetNode(PlanState *node, AttrNumber attno, AttrNumber *lattno, Oid *collation, Oid *var_type) { Var *var; TargetEntry *te; @@ -2360,6 +2369,8 @@ CheckTargetNode(PlanState *node, AttrNumber attno, AttrNumber *lattno) return false; *lattno = var->varattno; + *collation = var->varcollid; + *var_type = var->vartype; return true; } @@ -2372,7 +2383,7 @@ CheckTargetNode(PlanState *node, AttrNumber attno, AttrNumber *lattno) * SeqScan <- target */ static List * -FindTargetNodes(HashJoinState *hjstate, AttrNumber attno, AttrNumber *lattno) +FindTargetNodes(HashJoinState *hjstate, AttrNumber attno, AttrNumber *lattno, Oid *collation, Oid *var_type) { Var *var; PlanState *child, *parent; @@ -2396,7 +2407,7 @@ FindTargetNodes(HashJoinState *hjstate, AttrNumber attno, AttrNumber *lattno) * [result] * seqscan | dynamicseqscan */ - if (!CheckTargetNode(child, attno, lattno)) + if (!CheckTargetNode(child, attno, lattno, collation, var_type)) return NULL; targetNodes = lappend(targetNodes, child); @@ -2414,7 +2425,7 @@ FindTargetNodes(HashJoinState *hjstate, AttrNumber attno, AttrNumber *lattno) for (int i = 0; i < as->as_nplans; i++) { child = as->appendplans[i]; - if (!CheckTargetNode(child, attno, lattno)) + if (!CheckTargetNode(child, attno, lattno, collation, var_type)) return NULL; targetNodes = lappend(targetNodes, child); @@ -2462,12 +2473,25 @@ CreateAttrFilter(PlanState *target, AttrNumber lattno, AttrNumber rattno, { AttrFilter *attr_filter = palloc0(sizeof(AttrFilter)); attr_filter->empty = true; + attr_filter->hasnulls = false; attr_filter->target = target; attr_filter->lattno = lattno; attr_filter->rattno = rattno; + attr_filter->n_distinct = 0.0; - attr_filter->blm_filter = bloom_create_aggresive(plan_rows, work_mem, random()); + attr_filter->blm_filter = bloom_create_aggresive(plan_rows, work_mem, gp_session_id); + + if (target && IsA(target, SeqScanState)) + { + HeapTuple statstuple; + SeqScanState *scan = (SeqScanState *)target; + statstuple = get_att_stats(RelationGetRelid(scan->ss.ss_currentRelation), lattno); + if (HeapTupleIsValid(statstuple)) + { + attr_filter->n_distinct = ((Form_pg_statistic) GETSTRUCT(statstuple))->stadistinct; + } + } StaticAssertDecl(sizeof(LONG_MAX) == sizeof(Datum), "sizeof(LONG_MAX) should be equal to sizeof(Datum)"); StaticAssertDecl(sizeof(LONG_MIN) == sizeof(Datum), "sizeof(LONG_MIN) should be equal to sizeof(Datum)"); diff --git a/src/backend/executor/nodeRuntimeFilter.c b/src/backend/executor/nodeRuntimeFilter.c index 14ca2b6cbfe..57854668535 100644 --- a/src/backend/executor/nodeRuntimeFilter.c +++ b/src/backend/executor/nodeRuntimeFilter.c @@ -28,7 +28,10 @@ #include "postgres.h" +#include "access/tableam.h" #include "catalog/pg_type.h" +#include "catalog/pg_statistic.h" +#include "catalog/pg_namespace.h" #include "executor/executor.h" #include "executor/hashjoin.h" #include "executor/nodeRuntimeFilter.h" @@ -36,6 +39,7 @@ #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "nodes/pg_list.h" +#include "utils/datum.h" #include "utils/lsyscache.h" #include "cdb/cdbvars.h" @@ -325,4 +329,31 @@ RFFillTupleValues(RuntimeFilterState *rfstate, List *values) rfstate->value_buf[idx] = *dp; idx++; } -} \ No newline at end of file +} + +void +ExecRFExplainEnd(HashState *hashState, struct StringInfoData *buf) +{ + ListCell *lc; + AttrFilter *attr_filter; + SeqScanState *sss; + + if (!hashState->filters) + return; + + foreach (lc, hashState->filters) + { + attr_filter = lfirst(lc); + if (attr_filter->empty || attr_filter->hasnulls) + continue; + + if (IsA(attr_filter->target, SeqScanState)) + { + sss = castNode(SeqScanState, attr_filter->target); + appendStringInfo(buf, "RF: %s attrno: %d, range[%ld, %ld], n_distinct: %.2f\n", + RelationGetRelationName(sss->ss.ss_currentRelation), + attr_filter->lattno, (int64_t) attr_filter->min, + (int64_t) attr_filter->max, attr_filter->n_distinct); + } + } +} diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index c9debfb1fd0..404b71e7279 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -31,18 +31,24 @@ #include "access/relscan.h" #include "access/session.h" #include "access/tableam.h" +#include "catalog/pg_namespace.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" +#include "utils/datum.h" #include "utils/rel.h" #include "utils/builtins.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" #include "nodes/nodeFuncs.h" +#include "nodes/makefuncs.h" #include "cdb/cdbaocsam.h" #include "cdb/cdbappendonlyam.h" #include "cdb/cdbvars.h" static TupleTableSlot *SeqNext(SeqScanState *node); -static ScanKey ScanKeyListToArray(List *keys, int *num); +static List *ConvertScanKeysToQuals(List *scankeys, Index scanrelid); +static void ExecSeqScanExplainEnd(PlanState *planstate, struct StringInfoData *buf); /* ---------------------------------------------------------------- * Scan Support @@ -72,16 +78,22 @@ SeqNext(SeqScanState *node) if (scandesc == NULL) { - int nkeys = 0; - ScanKey keys = NULL; - /* * Just when gp_enable_runtime_filter_pushdown enabled and * node->filter_in_seqscan is false means scankey need to be pushed to * AM. */ - if (gp_enable_runtime_filter_pushdown && !node->filter_in_seqscan) - keys = ScanKeyListToArray(node->filters, &nkeys); + if (gp_enable_runtime_filter_pushdown && !node->filter_in_seqscan && + !estate->useMppParallelMode) + { + List *quals = NIL; + Scan *scan = (Scan *)node->ss.ps.plan; + quals = ConvertScanKeysToQuals(node->filters, scan->scanrelid); + if (quals) + { + node->ss.ps.plan->qual = list_concat(node->ss.ps.plan->qual, quals); + } + } /* * We reach here if the scan is not parallel, or if we're serially @@ -89,7 +101,7 @@ SeqNext(SeqScanState *node) */ scandesc = table_beginscan_es(node->ss.ss_currentRelation, estate->es_snapshot, - nkeys, keys, + 0, NULL, NULL, &node->ss.ps); node->ss.ss_currentScanDesc = scandesc; @@ -188,6 +200,14 @@ ExecInitSeqScanForPartition(SeqScan *node, EState *estate, scanstate->ss.ps.state = estate; scanstate->ss.ps.ExecProcNode = ExecSeqScan; + if (estate->es_instrument && (estate->es_instrument & INSTRUMENT_CDB)) + { + /* Allocate string buffer. */ + scanstate->ss.ps.cdbexplainbuf = makeStringInfo(); + + /* Request a callback at end of query. */ + scanstate->ss.ps.cdbexplainfun = ExecSeqScanExplainEnd; + } /* * Miscellaneous initialization * @@ -223,7 +243,7 @@ ExecInitSeqScanForPartition(SeqScan *node, EState *estate, if (gp_enable_runtime_filter_pushdown && !estate->useMppParallelMode) { - scanstate->filter_in_seqscan = true; + scanstate->filter_in_seqscan = !(table_scan_flags(currentRelation) & SCAN_SUPPORT_RUNTIME_FILTER); } return scanstate; @@ -440,28 +460,137 @@ PassByBloomFilter(PlanState *ps, List *filters, TupleTableSlot *slot) } /* - * Convert the list of ScanKey to the array, and append an emtpy ScanKey as - * the end flag of the array. + * ConvertScanKeysToQuals + * Convert a list of ScanKeys to a list of Expr nodes. + * + * This is used to convert the scan keys into a form that can be used as + * runtime filter qual. */ -static ScanKey -ScanKeyListToArray(List *keys, int *num) +static List * +ConvertScanKeysToQuals(List *scankeys, Index scanrelid) { - ScanKey sk; + List *result = NIL; + ListCell *lc; - if (list_length(keys) == 0) - return NULL; + foreach (lc, scankeys) + { + ScanKey skey = (ScanKey) lfirst(lc); + Expr *qual; + const char *opname; + Oid oproid; + int16 typlen; + bool typbyval; + /* support SK_BLOOM_FILTER by PassByBloomFilter */ + if (skey->sk_flags == SK_BLOOM_FILTER) + continue; + + /* Create Var node representing the scan attribute */ + Var *var = makeVar(scanrelid, /* ID of scan relation */ + skey->sk_attno, /* Attribute number */ + skey->sk_subtype, /* Attribute type */ + -1, /* Type modifier */ + skey->sk_collation, /* Collation */ + 0); /* Variable level */ + get_typlenbyval(skey->sk_subtype, &typlen, &typbyval); + /* Create Const node representing the comparison value */ + Const *constant = + makeConst(skey->sk_subtype, /* Constant type */ + -1, /* Type modifier */ + skey->sk_collation, /* Collation */ + typlen, /* Length */ + datumCopy(skey->sk_argument, typbyval, typlen), /* Value */ + false, /* Is null */ + typbyval); /* By value */ + + /* Determine operator name based on B-tree strategy number */ + switch (skey->sk_strategy) + { + case BTGreaterEqualStrategyNumber: + opname = ">="; + break; + case BTLessEqualStrategyNumber: + opname = "<="; + break; + default: + elog(ERROR, "unsupported strategy number: %d", + skey->sk_strategy); + } - Assert(num); - *num = list_length(keys); + /* Look up operator OID dynamically */ + oproid = get_operator(InvalidOid, skey->sk_subtype, skey->sk_subtype, + PG_CATALOG_NAMESPACE, opname, false); - sk = (ScanKey)palloc(sizeof(ScanKeyData) * (*num + 1)); - for (int i = 0; i < *num; ++i) - memcpy(&sk[i], list_nth(keys, i), sizeof(ScanKeyData)); - /* - * SK_EMPYT means the end of the array of the ScanKey - */ - sk[*num].sk_flags = SK_EMPYT; + /* Create operator expression node */ + OpExpr *opexpr = makeNode(OpExpr); + opexpr->opno = oproid; /* Operator OID */ + opexpr->opfuncid = get_opcode(oproid); /* Implementing function's OID */ + opexpr->opresulttype = BOOLOID; /* Result is always boolean */ + opexpr->opretset = false; /* Not a set returning operator */ + opexpr->opcollid = InvalidOid; /* No collation */ + opexpr->inputcollid = InvalidOid; /* No collation for input */ + + /* Add the comparison arguments */ + opexpr->args = list_make2(var, constant); - return sk; + qual = (Expr *) opexpr; + result = lappend(result, qual); + } + + /* If we have multiple conditions, combine them with AND */ + if (list_length(result) > 1) + { + BoolExpr *andExpr = makeNode(BoolExpr); + andExpr->boolop = AND_EXPR; + andExpr->args = result; + andExpr->location = -1; + return list_make1(andExpr); + } + + return result; +} + +static void +ExecSeqScanExplainEnd(PlanState *planstate, struct StringInfoData *buf) +{ + SeqScanState *scanstate = (SeqScanState *) planstate; + ListCell *lc; + int idx = 0; + Relation rel; + TupleDesc tupdesc; + + if (!scanstate->filters) + return; + + rel = scanstate->ss.ss_currentRelation; + tupdesc = RelationGetDescr(rel); + + foreach_with_count (lc, scanstate->filters, idx) + { + ScanKey sk = lfirst(lc); + if (sk->sk_flags == SK_BLOOM_FILTER && scanstate->filter_in_seqscan) + { + bloom_filter *bf = (bloom_filter *) DatumGetPointer(sk->sk_argument); + if (bf) + appendStringInfo(buf, "BloomFilter[%d]: fpr:%f ", idx, bloom_false_positive_rate(bf)); + } + if (sk->sk_flags == 0 && !scanstate->filter_in_seqscan) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, sk->sk_attno - 1); + + switch(sk->sk_strategy) + { + case BTGreaterEqualStrategyNumber: + appendStringInfo(buf, "ScanKey[%s] >= %ld, ", + NameStr(attr->attname), DatumGetInt64(sk->sk_argument)); + break; + case BTLessEqualStrategyNumber: + appendStringInfo(buf, "ScanKey[%s] <= %ld ", + NameStr(attr->attname), DatumGetInt64(sk->sk_argument)); + break; + default: + break; + } + } + } } diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 25cefe8bc91..c66a2d240b5 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -4566,7 +4566,49 @@ get_operator_opfamilies(Oid opno) ReleaseSysCacheList(catlist); return opfam_oids; -} +} + +Oid +get_operator(Oid opno, Oid oprleft, Oid oprright, Oid namespaceId, + const char *oprname, bool noError) +{ + HeapTuple tup; + Form_pg_operator oprform; + Oid result = InvalidOid; + + /* If opno is valid, just look up the operator directly */ + if (OidIsValid(opno)) + { + tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (HeapTupleIsValid(tup)) + { + result = opno; + ReleaseSysCache(tup); + } + return result; + } + + /* Search for the operator based on name and argument types */ + tup = SearchSysCache4(OPERNAMENSP, PointerGetDatum(oprname), + ObjectIdGetDatum(oprleft), ObjectIdGetDatum(oprright), + ObjectIdGetDatum(namespaceId)); + + if (HeapTupleIsValid(tup)) + { + oprform = (Form_pg_operator) GETSTRUCT(tup); + result = oprform->oid; + ReleaseSysCache(tup); + } + else if (!noError) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("operator does not exist: %s(%s, %s)", oprname, + format_type_be(oprleft), format_type_be(oprright)))); + } + + return result; +} /* * get_index_opfamilies diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index df83e03b766..c116193893d 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -466,6 +466,8 @@ int gp_predicate_pushdown_sample_rows; bool gp_enable_runtime_filter_pushdown; +double gp_runtime_filter_selectivity_threshold; + bool enable_offload_entry_to_qe = false; bool enable_answer_query_using_materialized_views = false; bool aqumv_allow_foreign_table = false; @@ -4779,6 +4781,17 @@ struct config_real ConfigureNamesReal_gp[] = NULL, NULL, NULL }, + { + {"gp_runtime_filter_selectivity_threshold", PGC_USERSET, QUERY_TUNING_OTHER, + gettext_noop("The threshold of selectivity to optimize runtime filter pushdown."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_runtime_filter_selectivity_threshold, + 0.8, 0.0, DBL_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL diff --git a/src/include/executor/nodeRuntimeFilter.h b/src/include/executor/nodeRuntimeFilter.h index f2fb3401daf..070a6bcd02c 100644 --- a/src/include/executor/nodeRuntimeFilter.h +++ b/src/include/executor/nodeRuntimeFilter.h @@ -38,5 +38,6 @@ extern void RFAddTupleValues(RuntimeFilterState *rfstate, List *vals); extern void ExecInitRuntimeFilterFinish(RuntimeFilterState *node, double inner_rows); +extern void ExecRFExplainEnd(HashState *hashState, struct StringInfoData *buf); #endif /* NODERUNTIMEFILTER_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 63a4b212bfe..e4524b06555 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -3093,15 +3093,19 @@ typedef struct RuntimeFilterState typedef struct AttrFilter { bool empty; /* empty filter or not */ + bool hasnulls; /* has null value or not */ PlanState *target;/* the node in where runtime filter will be used, target will be seqscan, see FindTargetAttr(). in nodeHashjoin.c */ AttrNumber rattno; /* attr no in hash node */ AttrNumber lattno; /* if target is seqscan, attr no in relation */ + Oid vartype; /* type of the attribute */ + Oid collation; /* collation of the attribute */ bloom_filter *blm_filter; Datum min; Datum max; + float4 n_distinct; /* estimated number of distinct values */ } AttrFilter; /* ---------------- diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index de0f931c797..8a6468e67aa 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -656,6 +656,7 @@ extern bool gp_allow_date_field_width_5digits; * bloom filter for performance. */ extern bool gp_enable_runtime_filter_pushdown; +extern double gp_runtime_filter_selectivity_threshold; typedef enum { diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 51224853cef..cb36471ee89 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -264,6 +264,8 @@ extern Oid default_partition_opfamily_for_type(Oid typeoid); extern int get_relnatts(Oid relid); +extern Oid get_operator(Oid opno, Oid oprleft, Oid oprright, Oid namespaceId, const char *oprname,bool noError); + #define type_is_array(typid) (get_element_type(typid) != InvalidOid) /* type_is_array_domain accepts both plain arrays and domains over arrays */ #define type_is_array_domain(typid) (get_base_element_type(typid) != InvalidOid) diff --git a/src/include/utils/sync_guc_name.h b/src/include/utils/sync_guc_name.h index 7cecf083727..43045de9bb6 100644 --- a/src/include/utils/sync_guc_name.h +++ b/src/include/utils/sync_guc_name.h @@ -66,6 +66,7 @@ "gp_enable_interconnect_aggressive_retry", "gp_enable_runtime_filter", "gp_enable_runtime_filter_pushdown", + "gp_runtime_filter_selectivity_threshold", "gp_enable_segment_copy_checking", "gp_external_enable_filter_pushdown", "gp_hashjoin_tuples_per_bucket", diff --git a/src/test/regress/expected/gp_runtime_filter.out b/src/test/regress/expected/gp_runtime_filter.out index cb47862576c..e341b1933e0 100644 --- a/src/test/regress/expected/gp_runtime_filter.out +++ b/src/test/regress/expected/gp_runtime_filter.out @@ -294,19 +294,21 @@ SELECT t1.c3 FROM t1, t2 WHERE t1.c2 = t2.c2; SET gp_enable_runtime_filter_pushdown TO on; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT t1.c3 FROM t1, t2 WHERE t1.c2 = t2.c2; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) (actual rows=0 loops=1) -> Hash Join (actual rows=0 loops=1) Hash Cond: (t1.c2 = t2.c2) - Extra Text: (seg2) Hash chain length 8.0 avg, 8 max, using 4 of 524288 buckets. + Extra Text: (seg2) RF: t1 attrno: 2, range[1, 4], n_distinct: 1.00 + (seg2) Hash chain length 8.0 avg, 8 max, using 4 of 524288 buckets. -> Seq Scan on t1 (actual rows=1 loops=1) Rows Removed by Pushdown Runtime Filter: 127 + Extra Text: (seg2) BloomFilter[0]: fpr:0.000000, -> Hash (actual rows=32 loops=1) Buckets: 524288 Batches: 1 Memory Usage: 4098kB -> Seq Scan on t2 (actual rows=32 loops=1) Optimizer: Postgres query optimizer -(10 rows) +(12 rows) RESET gp_enable_runtime_filter_pushdown; DROP TABLE IF EXISTS t1; @@ -344,24 +346,28 @@ SELECT * FROM t1, t2 WHERE t1.c2 = t2.c2; SET gp_enable_runtime_filter_pushdown TO on; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t1, t2 WHERE t1.c2 = t2.c2; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (actual rows=96 loops=1) -> Hash Join (actual rows=64 loops=1) Hash Cond: (t1.c2 = t2.c2) - Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 6 of 524288 buckets. + Extra Text: (seg0) RF: t1_1_prt_1 attrno: 2, range[1, 51], n_distinct: 18.00 + (seg0) RF: t1_1_prt_2 attrno: 2, range[1, 51], n_distinct: 20.00 + (seg0) Hash chain length 1.0 avg, 1 max, using 6 of 524288 buckets. -> Append (actual rows=64 loops=1) Partition Selectors: $0 -> Seq Scan on t1_1_prt_1 t1_1 (actual rows=48 loops=1) Rows Removed by Pushdown Runtime Filter: 240 + Extra Text: (seg0) BloomFilter[0]: fpr:0.000000, -> Seq Scan on t1_1_prt_2 t1_2 (actual rows=16 loops=1) Rows Removed by Pushdown Runtime Filter: 304 + Extra Text: (seg0) BloomFilter[0]: fpr:0.000000, -> Hash (actual rows=6 loops=1) Buckets: 524288 Batches: 1 Memory Usage: 4097kB -> Partition Selector (selector id: $0) (actual rows=6 loops=1) -> Seq Scan on t2 (actual rows=6 loops=1) Optimizer: Postgres query optimizer -(15 rows) +(19 rows) RESET gp_enable_runtime_filter_pushdown; DROP TABLE IF EXISTS t1; @@ -396,16 +402,16 @@ INSERT INTO t1 SELECT GENERATE_SERIES(1, 1000), GENERATE_SERIES(1, 1000); INSERT INTO t2 SELECT * FROM t1; SET gp_enable_runtime_filter_pushdown TO on; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT count(t1.c2) FROM t1, t2 WHERE t1.c1 = t2.c1; - QUERY PLAN ---------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=1 loops=1) -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=1) -> Partial Aggregate (actual rows=1 loops=1) -> Hash Join (actual rows=340 loops=1) Hash Cond: (t1.c1 = t2.c1) - Extra Text: (seg2) Hash chain length 1.0 avg, 1 max, using 340 of 524288 buckets. + Extra Text: (seg2) RF: t1 attrno: 1, range[5, 997], n_distinct: 0.00 + (seg2) Hash chain length 1.0 avg, 1 max, using 340 of 524288 buckets. -> Seq Scan on t1 (actual rows=340 loops=1) - Rows Removed by Pushdown Runtime Filter: 0 -> Hash (actual rows=340 loops=1) Buckets: 524288 Batches: 1 Memory Usage: 4108kB -> Seq Scan on t2 (actual rows=340 loops=1) @@ -443,25 +449,27 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT t1.c3 FROM t1, t2 W -> Hash (actual rows=6 loops=1) Buckets: 524288 Batches: 1 Memory Usage: 4097kB -> Seq Scan on t2 (actual rows=6 loops=1) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (10 rows) SET gp_enable_runtime_filter_pushdown TO on; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT t1.c3 FROM t1, t2 WHERE t1.c1 = t2.c1; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) (actual rows=32 loops=1) -> Hash Join (actual rows=32 loops=1) Hash Cond: (t1.c1 = t2.c1) - Extra Text: (seg0) Hash chain length 2.0 avg, 2 max, using 3 of 524288 buckets. + Extra Text: (seg0) RF: t1 attrno: 1, range[2, 4], n_distinct: -0.12 + (seg0) Hash chain length 2.0 avg, 2 max, using 3 of 524288 buckets. -> Result (actual rows=16 loops=1) -> Seq Scan on t1 (actual rows=16 loops=1) Rows Removed by Pushdown Runtime Filter: 8 + Extra Text: (seg0) BloomFilter[0]: fpr:0.000000, ScanKey[c1] >= 2, ScanKey[c1] <= 4 -> Hash (actual rows=6 loops=1) Buckets: 524288 Batches: 1 Memory Usage: 4097kB -> Seq Scan on t2 (actual rows=6 loops=1) - Optimizer: Pivotal Optimizer (GPORCA) -(11 rows) + Optimizer: GPORCA +(13 rows) RESET gp_enable_runtime_filter_pushdown; DROP TABLE IF EXISTS t1; @@ -511,17 +519,20 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t1, t2, t3 W SET gp_enable_runtime_filter_pushdown TO on; EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t1, t2, t3 WHERE t1.c1 = t2.c1 AND t1.c2 = t3.c2; - QUERY PLAN ---------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) (actual rows=256 loops=1) -> Hash Join (actual rows=256 loops=1) Hash Cond: (t1.c1 = t2.c1) - Extra Text: Hash chain length 4.0 avg, 4 max, using 4 of 32768 buckets. + Extra Text: RF: t1 attrno: 1, range[1, 4], n_distinct: 3.00 + Hash chain length 4.0 avg, 4 max, using 4 of 32768 buckets. -> Hash Join (actual rows=64 loops=1) Hash Cond: (t1.c2 = t3.c2) - Extra Text: Hash chain length 2.0 avg, 2 max, using 4 of 32768 buckets. + Extra Text: RF: t1 attrno: 2, range[1, 4], n_distinct: 3.00 + Hash chain length 2.0 avg, 2 max, using 4 of 32768 buckets. -> Seq Scan on t1 (actual rows=32 loops=1) Rows Removed by Pushdown Runtime Filter: 16 + Extra Text: BloomFilter[0]: fpr:0.000000, BloomFilter[1]: fpr:0.000000, -> Hash (actual rows=8 loops=1) Buckets: 32768 Batches: 1 Memory Usage: 258kB -> Seq Scan on t3 (actual rows=8 loops=1) @@ -529,7 +540,7 @@ EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM t1, t2, t3 W Buckets: 32768 Batches: 1 Memory Usage: 260kB -> Seq Scan on t2 (actual rows=16 loops=1) Optimizer: Postgres query optimizer -(16 rows) +(19 rows) RESET gp_enable_runtime_filter_pushdown; DROP TABLE IF EXISTS t1; diff --git a/src/test/regress/expected/gp_runtimefilter_pushdown.out b/src/test/regress/expected/gp_runtimefilter_pushdown.out new file mode 100644 index 00000000000..1b62d7ab590 --- /dev/null +++ b/src/test/regress/expected/gp_runtimefilter_pushdown.out @@ -0,0 +1,359 @@ +SET optimizer TO on; +-- Test Suit 1: runtime filter main case +DROP TABLE IF EXISTS fact_rf, dim_rf; +CREATE TABLE fact_rf (fid int, did int, val int); +CREATE TABLE dim_rf (did int, proj_id int, filter_val int); +-- Generating data, fact_rd.did and dim_rf.did is 80% matched +INSERT INTO fact_rf SELECT i, i % 8000 + 1, i FROM generate_series(1, 100000) s(i); +INSERT INTO dim_rf SELECT i, i % 10, i FROM generate_series(1, 10000) s(i); +ANALYZE fact_rf, dim_rf; +SET gp_enable_runtime_filter_pushdown TO off; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1481 width=1) (actual time=4.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 21.191 ms + (slice0) Executor memory: 128K bytes. + (slice1) Executor memory: 4166K bytes avg x 3x(0) workers, 4166K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 113K bytes avg x 3x(0) workers, 113K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 13.893 ms +(20 rows) + +SET gp_enable_runtime_filter_pushdown TO on; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=12.001..12.001 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..12.001 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1481 width=1) (actual time=4.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) RF: fact_rf attrno: 2, range[1, 1000], n_distinct: -0.22 + (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + Extra Text: (seg0) BloomFilter[0]: fpr:0.000000, ScanKey[did] >= 1, ScanKey[did] <= 1000 + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=4.000..4.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..4.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..4.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 15.001 ms + (slice0) Executor memory: 128K bytes. + (slice1) Executor memory: 4400K bytes avg x 3x(0) workers, 4400K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 10.579 ms +(22 rows) + +-- Test bad filter rate +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 7; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..870.16 rows=1 width=8) (actual time=20.001..20.001 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..870.16 rows=1 width=8) (actual time=20.001..20.001 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..870.16 rows=1 width=8) (actual time=20.001..20.001 rows=1 loops=1) + -> Hash Join (cost=0.00..870.16 rows=29116 width=1) (actual time=4.000..20.001 rows=23492 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg2) Hash chain length 1.0 avg, 2 max, using 2285 of 524288 buckets. + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..432.51 rows=33334 width=4) (actual time=0.000..8.000 rows=33501 loops=1) + Hash Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..8.000 rows=33462 loops=1) + -> Hash (cost=431.23..431.23 rows=2334 width=4) (actual time=0.000..0.000 rows=2368 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4180kB + -> Seq Scan on dim_rf (cost=0.00..431.23 rows=2334 width=4) (actual time=0.000..0.000 rows=2368 loops=1) + Filter: (proj_id < 7) + Rows Removed by Filter: 1017 + Planning Time: 13.487 ms + (slice0) Executor memory: 126K bytes. + (slice1) Executor memory: 4252K bytes avg x 3x(0) workers, 4252K bytes max (seg0). Work_mem: 4180K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 19.065 ms +(21 rows) + +-- Test outer join +-- LeftJoin (eliminated and applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=12.001..12.001 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=12.001..12.001 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1481 width=1) (actual time=0.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) RF: fact_rf attrno: 2, range[1, 1000], n_distinct: -0.22 + (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..4.000 rows=33462 loops=1) + Extra Text: (seg0) BloomFilter[0]: fpr:0.000000, ScanKey[did] >= 1, ScanKey[did] <= 1000 + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 15.371 ms + (slice0) Executor memory: 128K bytes. + (slice1) Executor memory: 4296K bytes avg x 3x(0) workers, 4296K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 9.322 ms +(22 rows) + +-- LeftJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..876.97 rows=1 width=8) (actual time=20.001..20.001 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..876.97 rows=1 width=8) (actual time=20.001..20.001 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..876.97 rows=1 width=8) (actual time=20.001..20.001 rows=1 loops=1) + -> Result (cost=0.00..876.97 rows=32949 width=1) (actual time=0.000..20.001 rows=1040 loops=1) + Filter: ((dim_rf.proj_id IS NULL) OR ((dim_rf.proj_id < 2) AND (dim_rf.filter_val <= 1000))) + -> Hash Left Join (cost=0.00..872.63 rows=65945 width=8) (actual time=0.000..16.001 rows=33501 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) Hash chain length 1.0 avg, 2 max, using 3377 of 524288 buckets. + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..432.51 rows=33334 width=4) (actual time=0.000..4.000 rows=33501 loops=1) + Hash Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + -> Hash (cost=431.08..431.08 rows=3334 width=12) (actual time=0.000..0.000 rows=3385 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4242kB + -> Seq Scan on dim_rf (cost=0.00..431.08 rows=3334 width=12) (actual time=0.000..0.000 rows=3385 loops=1) + Planning Time: 15.243 ms + (slice0) Executor memory: 123K bytes. + (slice1) Executor memory: 4318K bytes avg x 3x(0) workers, 4318K bytes max (seg0). Work_mem: 4242K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 20.155 ms +(21 rows) + +-- RightJoin (applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.72 rows=1 width=8) (actual time=40.002..40.002 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.72 rows=1 width=8) (actual time=36.002..40.002 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.72 rows=1 width=8) (actual time=40.002..40.002 rows=1 loops=1) + -> Hash Right Join (cost=0.00..869.72 rows=1542 width=1) (actual time=4.000..40.002 rows=1040 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 80 of 524288 buckets. + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..432.51 rows=33334 width=4) (actual time=0.000..4.000 rows=33501 loops=1) + Hash Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + -> Hash (cost=431.31..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4099kB + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 17.055 ms + (slice0) Executor memory: 127K bytes. + (slice1) Executor memory: 4188K bytes avg x 3x(0) workers, 4188K bytes max (seg2). Work_mem: 4099K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 37.169 ms +(21 rows) + +-- SemiJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Semi Join (cost=0.00..869.37 rows=1481 width=1) (actual time=0.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) RF: fact_rf attrno: 2, range[1, 1000], n_distinct: -0.22 + (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..8.000 rows=33462 loops=1) + Extra Text: (seg0) BloomFilter[0]: fpr:0.000000, ScanKey[did] >= 1, ScanKey[did] <= 1000 + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..0.000 rows=200 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 24.928 ms + (slice0) Executor memory: 128K bytes. + (slice1) Executor memory: 4296K bytes avg x 3x(0) workers, 4296K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 9.801 ms +(22 rows) + +-- SemiJoin -> InnerJoin and deduplicate +EXPLAIN ANALYZE SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..868.29 rows=1 width=8) (actual time=16.001..16.001 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..868.29 rows=1 width=8) (actual time=16.001..16.001 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..868.29 rows=1 width=8) (actual time=16.001..16.001 rows=1 loops=1) + -> Hash Join (cost=0.00..868.29 rows=119 width=1) (actual time=12.001..16.001 rows=80 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg0) Hash chain length 1.0 avg, 1 max, using 80 of 131072 buckets. + -> HashAggregate (cost=0.00..436.49 rows=2672 width=4) (actual time=12.001..12.001 rows=2679 loops=1) + Group Key: fact_rf.did + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..436.16 rows=2672 width=4) (actual time=12.001..12.001 rows=7979 loops=1) + Hash Key: fact_rf.did + -> Streaming HashAggregate (cost=0.00..436.12 rows=2672 width=4) (actual time=12.001..12.001 rows=7943 loops=1) + Group Key: fact_rf.did + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..0.000 rows=33462 loops=1) + -> Hash (cost=431.31..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Buckets: 131072 Batches: 1 Memory Usage: 1027kB + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=80 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 22.586 ms + (slice0) Executor memory: 280K bytes. + (slice1) Executor memory: 1306K bytes avg x 3x(0) workers, 1306K bytes max (seg1). Work_mem: 1027K bytes max. + (slice2) Executor memory: 824K bytes avg x 3x(0) workers, 832K bytes max (seg2). Work_mem: 913K bytes max. + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 15.230 ms +(25 rows) + +-- Test correctness +SELECT * FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND dim_rf.filter_val = 1 + ORDER BY fid; + fid | did | val | did | proj_id | filter_val +-------+-----+-------+-----+---------+------------ + 8000 | 1 | 8000 | 1 | 1 | 1 + 16000 | 1 | 16000 | 1 | 1 | 1 + 24000 | 1 | 24000 | 1 | 1 | 1 + 32000 | 1 | 32000 | 1 | 1 | 1 + 40000 | 1 | 40000 | 1 | 1 | 1 + 48000 | 1 | 48000 | 1 | 1 | 1 + 56000 | 1 | 56000 | 1 | 1 | 1 + 64000 | 1 | 64000 | 1 | 1 | 1 + 72000 | 1 | 72000 | 1 | 1 | 1 + 80000 | 1 | 80000 | 1 | 1 | 1 + 88000 | 1 | 88000 | 1 | 1 | 1 + 96000 | 1 | 96000 | 1 | 1 | 1 +(12 rows) + +SELECT * FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE dim_rf.filter_val = 1 + ORDER BY fid; + fid | did | val | did | proj_id | filter_val +-------+-----+-------+-----+---------+------------ + 8000 | 1 | 8000 | 1 | 1 | 1 + 16000 | 1 | 16000 | 1 | 1 | 1 + 24000 | 1 | 24000 | 1 | 1 | 1 + 32000 | 1 | 32000 | 1 | 1 | 1 + 40000 | 1 | 40000 | 1 | 1 | 1 + 48000 | 1 | 48000 | 1 | 1 | 1 + 56000 | 1 | 56000 | 1 | 1 | 1 + 64000 | 1 | 64000 | 1 | 1 | 1 + 72000 | 1 | 72000 | 1 | 1 | 1 + 80000 | 1 | 80000 | 1 | 1 | 1 + 88000 | 1 | 88000 | 1 | 1 | 1 + 96000 | 1 | 96000 | 1 | 1 | 1 +(12 rows) + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + count +------- + 2599 +(1 row) + +SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + count +------- + 200 +(1 row) + +-- Test contain null values +INSERT INTO dim_rf VALUES (NULL,1, 1); +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=3 loops=1) + -> Partial Aggregate (cost=0.00..869.37 rows=1 width=8) (actual time=8.000..8.000 rows=1 loops=1) + -> Hash Join (cost=0.00..869.37 rows=1481 width=1) (actual time=4.000..8.000 rows=898 loops=1) + Hash Cond: (fact_rf.did = dim_rf.did) + Extra Text: (seg1) Hash chain length 1.0 avg, 1 max, using 200 of 524288 buckets. + -> Seq Scan on fact_rf (cost=0.00..431.84 rows=33334 width=4) (actual time=0.000..4.000 rows=33462 loops=1) + -> Hash (cost=431.33..431.33 rows=356 width=4) (actual time=4.000..4.000 rows=200 loops=1) + Buckets: 524288 Batches: 1 Memory Usage: 4104kB + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..431.33 rows=356 width=4) (actual time=0.000..4.000 rows=201 loops=1) + -> Seq Scan on dim_rf (cost=0.00..431.31 rows=119 width=4) (actual time=0.000..0.000 rows=81 loops=1) + Filter: ((proj_id < 2) AND (filter_val <= 1000)) + Rows Removed by Filter: 3288 + Planning Time: 14.607 ms + (slice0) Executor memory: 128K bytes. + (slice1) Executor memory: 4295K bytes avg x 3x(0) workers, 4295K bytes max (seg0). Work_mem: 4104K bytes max. + (slice2) Executor memory: 111K bytes avg x 3x(0) workers, 111K bytes max (seg0). + Memory used: 128000kB + Optimizer: GPORCA + Execution Time: 9.279 ms +(20 rows) + +SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + count +------- + 2599 +(1 row) + +-- Clean up: reset guc +SET gp_enable_runtime_filter_pushdown TO off; +RESET optimizer; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index f2f66e6cd6e..1f67a45dfa4 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -127,6 +127,7 @@ test: rle rle_delta dsp not_out_of_shmem_exit_slots create_am_gp # Cloudberry-specific tests test: cbdb_optimizer_test test: gp_runtime_filter +test: gp_runtimefilter_pushdown # Disabled tests. XXX: Why are these disabled? #test: olap_window diff --git a/src/test/regress/sql/gp_runtimefilter_pushdown.sql b/src/test/regress/sql/gp_runtimefilter_pushdown.sql new file mode 100644 index 00000000000..0fcbf84e0d8 --- /dev/null +++ b/src/test/regress/sql/gp_runtimefilter_pushdown.sql @@ -0,0 +1,85 @@ +SET optimizer TO on; +-- Test Suit 1: runtime filter main case +DROP TABLE IF EXISTS fact_rf, dim_rf; +CREATE TABLE fact_rf (fid int, did int, val int); +CREATE TABLE dim_rf (did int, proj_id int, filter_val int); + +-- Generating data, fact_rd.did and dim_rf.did is 80% matched +INSERT INTO fact_rf SELECT i, i % 8000 + 1, i FROM generate_series(1, 100000) s(i); +INSERT INTO dim_rf SELECT i, i % 10, i FROM generate_series(1, 10000) s(i); +ANALYZE fact_rf, dim_rf; + +SET gp_enable_runtime_filter_pushdown TO off; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + +SET gp_enable_runtime_filter_pushdown TO on; +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + +-- Test bad filter rate +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 7; + +-- Test outer join +-- LeftJoin (eliminated and applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +-- LeftJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + +-- RightJoin (applicatable) +EXPLAIN ANALYZE SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +-- SemiJoin +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + +-- SemiJoin -> InnerJoin and deduplicate +EXPLAIN ANALYZE SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + +-- Test correctness +SELECT * FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND dim_rf.filter_val = 1 + ORDER BY fid; + +SELECT * FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE dim_rf.filter_val = 1 + ORDER BY fid; + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +SELECT COUNT(*) FROM + fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000; + +SELECT COUNT(*) FROM + fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did + WHERE proj_id < 2 AND filter_val <= 1000; + +SELECT COUNT(*) FROM fact_rf + WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000); + +SELECT COUNT(*) FROM dim_rf + WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000; + +-- Test contain null values +INSERT INTO dim_rf VALUES (NULL,1, 1); +EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; +SELECT COUNT(*) FROM fact_rf, dim_rf + WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000; + +-- Clean up: reset guc +SET gp_enable_runtime_filter_pushdown TO off; +RESET optimizer; \ No newline at end of file