Skip to content

Commit e16fccc

Browse files
committed
PaxAccessMethod::ScanFlags support SCAN_SUPPORT_RUNTIME_FILTER.
Add runtime_filter test with minmax.
1 parent 85be9d0 commit e16fccc

File tree

7 files changed

+465
-5
lines changed

7 files changed

+465
-5
lines changed

contrib/pax_storage/expected/runtime_filter.out

Lines changed: 365 additions & 0 deletions
Large diffs are not rendered by default.

contrib/pax_storage/pax_schedule

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,6 @@ test: dictionary_encoding
2020

2121
test: cluster
2222
test: db_size_functions
23+
test: runtime_filter
2324

2425
test: teardown
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
SET optimizer TO on;
2+
3+
-- Test Suit 1: runtime filter main case
4+
DROP TABLE IF EXISTS fact_rf, dim_rf;
5+
CREATE TABLE fact_rf (fid int, did int, val int) using pax WITH(minmax_columns='fid,did,val');
6+
CREATE TABLE dim_rf (did int, proj_id int, filter_val int) using pax WITH(minmax_columns='did,proj_id,filter_val');
7+
8+
-- Generating data, fact_rd.did and dim_rf.did is 80% matched
9+
INSERT INTO fact_rf SELECT i, i % 8000 + 1, i FROM generate_series(1, 100000) s(i);
10+
INSERT INTO dim_rf SELECT i, i % 10, i FROM generate_series(1, 10000) s(i);
11+
ANALYZE fact_rf, dim_rf;
12+
13+
SET gp_enable_runtime_filter_pushdown TO off;
14+
EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf
15+
WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000;
16+
17+
SET gp_enable_runtime_filter_pushdown TO on;
18+
EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf
19+
WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000;
20+
21+
-- Test bad filter rate
22+
EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf
23+
WHERE fact_rf.did = dim_rf.did AND proj_id < 7;
24+
25+
-- Test outer join
26+
-- LeftJoin (eliminated and applicatable)
27+
EXPLAIN ANALYZE SELECT COUNT(*) FROM
28+
fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did
29+
WHERE proj_id < 2 AND filter_val <= 1000;
30+
31+
-- LeftJoin
32+
EXPLAIN ANALYZE SELECT COUNT(*) FROM
33+
fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did
34+
WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000;
35+
36+
-- RightJoin (applicatable)
37+
EXPLAIN ANALYZE SELECT COUNT(*) FROM
38+
fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did
39+
WHERE proj_id < 2 AND filter_val <= 1000;
40+
41+
-- SemiJoin
42+
EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf
43+
WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000);
44+
45+
-- SemiJoin -> InnerJoin and deduplicate
46+
EXPLAIN ANALYZE SELECT COUNT(*) FROM dim_rf
47+
WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000;
48+
49+
-- Test correctness
50+
SELECT * FROM fact_rf, dim_rf
51+
WHERE fact_rf.did = dim_rf.did AND dim_rf.filter_val = 1
52+
ORDER BY fid;
53+
54+
SELECT * FROM
55+
fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did
56+
WHERE dim_rf.filter_val = 1
57+
ORDER BY fid;
58+
59+
SELECT COUNT(*) FROM
60+
fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did
61+
WHERE proj_id < 2 AND filter_val <= 1000;
62+
63+
SELECT COUNT(*) FROM
64+
fact_rf LEFT JOIN dim_rf ON fact_rf.did = dim_rf.did
65+
WHERE proj_id IS NULL OR proj_id < 2 AND filter_val <= 1000;
66+
67+
SELECT COUNT(*) FROM
68+
fact_rf RIGHT JOIN dim_rf ON fact_rf.did = dim_rf.did
69+
WHERE proj_id < 2 AND filter_val <= 1000;
70+
71+
SELECT COUNT(*) FROM fact_rf
72+
WHERE fact_rf.did IN (SELECT did FROM dim_rf WHERE proj_id < 2 AND filter_val <= 1000);
73+
74+
SELECT COUNT(*) FROM dim_rf
75+
WHERE dim_rf.did IN (SELECT did FROM fact_rf) AND proj_id < 2 AND filter_val <= 1000;
76+
77+
-- Test contain null values
78+
INSERT INTO dim_rf VALUES (NULL,1, 1);
79+
EXPLAIN ANALYZE SELECT COUNT(*) FROM fact_rf, dim_rf
80+
WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000;
81+
SELECT COUNT(*) FROM fact_rf, dim_rf
82+
WHERE fact_rf.did = dim_rf.did AND proj_id < 2 AND filter_val <= 1000;
83+
84+
-- Clean up: reset guc
85+
SET gp_enable_runtime_filter_pushdown TO off;
86+
RESET optimizer;

contrib/pax_storage/src/cpp/access/pax_access_handle.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ const TupleTableSlotOps *PaxAccessMethod::SlotCallbacks(
443443

444444
uint32 PaxAccessMethod::ScanFlags(Relation relation) {
445445
uint32 flags = 0;
446+
std::vector<int> minmax_columns;
446447
#ifdef VEC_BUILD
447448
flags |= SCAN_SUPPORT_VECTORIZATION | SCAN_SUPPORT_COLUMN_ORIENTED_SCAN;
448449
#else
@@ -452,6 +453,10 @@ uint32 PaxAccessMethod::ScanFlags(Relation relation) {
452453
#if defined(USE_MANIFEST_API) && !defined(USE_PAX_CATALOG)
453454
flags |= SCAN_FORCE_BIG_WRITE_LOCK;
454455
#endif
456+
minmax_columns = cbdb::GetMinMaxColumnIndexes(relation);
457+
if (!minmax_columns.empty()) {
458+
flags |= SCAN_SUPPORT_RUNTIME_FILTER;
459+
}
455460

456461
return flags;
457462
}

src/backend/executor/nodeHash.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4169,7 +4169,6 @@ PushdownRuntimeFilter(HashState *node)
41694169
!IsA(attr_filter->target, DynamicSeqScanState)))
41704170
continue;
41714171

4172-
SeqScanState *sss = castNode(SeqScanState, attr_filter->target);
41734172
/* bloom filter */
41744173
sk = (ScanKey)palloc0(sizeof(ScanKeyData));
41754174
sk->sk_flags = SK_BLOOM_FILTER;
@@ -4181,6 +4180,7 @@ PushdownRuntimeFilter(HashState *node)
41814180

41824181
if (attr_filter->n_distinct > 0)
41834182
{
4183+
SeqScanState *sss = castNode(SeqScanState, attr_filter->target);
41844184
int64 range = attr_filter->max - attr_filter->min + 1;
41854185
if ((range / attr_filter->n_distinct) > gp_runtime_filter_selectivity_threshold)
41864186
{

src/backend/executor/nodeHashjoin.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2197,8 +2197,8 @@ CreateRuntimeFilter(HashJoinState* hjstate)
21972197
AttrFilter *attr_filter;
21982198
ListCell *lc;
21992199
List *targets;
2200-
Oid var_type;
2201-
Oid collation;
2200+
Oid var_type = InvalidOid;
2201+
Oid collation = InvalidOid;
22022202

22032203
/*
22042204
* A build-side Bloom filter tells us if a row is definitely not in the build

src/backend/executor/nodeRuntimeFilter.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,10 +347,13 @@ ExecRFExplainEnd(HashState *hashState, struct StringInfoData *buf)
347347
if (attr_filter->empty || attr_filter->hasnulls)
348348
continue;
349349

350-
sss = castNode(SeqScanState, attr_filter->target);
351-
appendStringInfo(buf, "RF: %s attrno: %d, range[%ld, %ld], n_distinct: %.2f\n",
350+
if (IsA(attr_filter->target, SeqScanState))
351+
{
352+
sss = castNode(SeqScanState, attr_filter->target);
353+
appendStringInfo(buf, "RF: %s attrno: %d, range[%ld, %ld], n_distinct: %.2f\n",
352354
RelationGetRelationName(sss->ss.ss_currentRelation),
353355
attr_filter->lattno, (int64_t) attr_filter->min,
354356
(int64_t) attr_filter->max, attr_filter->n_distinct);
357+
}
355358
}
356359
}

0 commit comments

Comments
 (0)