From 6e97d8b4c388283aeaad44765b203863291a58c7 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 11 Aug 2025 15:34:28 +0800 Subject: [PATCH] Add option to avoid additional generated EquivalenceCalss for RelabelType in cdb_pull_up_eclass. Fix issue https://github.com/apache/cloudberry/issues/1301 For partition tables, cdb_pull_up_eclass will call get_eclass_for_sort_expr to find an existing equivalence class it is a member of. It will fail and build a new single-member EquivalenceClass for it when there is RelabelType. That will cause cdbpath_match_preds_to_both_distkeys return false even the locus are compatible because the RestrictInfo's left_ec/right_ec and rel's distribution key's dk_eclasses are two different pointers. And unnecessary Motions will be added in that case. CREATE TABLE t1 (id varchar(32), date date) DISTRIBUTED BY (id) PARTITION BY RANGE (date) (START (date '2016-01-01') INCLUSIVE END (date '2016-01-02') EXCLUSIVE EVERY (INTERVAL '1 day')); CREATE TABLE t2 (id varchar(32)) DISTRIBUTED BY (id); EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1 JOIN t2 USING(id); QUERY PLAN ------------------------------------------------------------------ Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Hash Join Hash Cond: ((t1.id)::text = (t2.id)::text) -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: t1.id -> Seq Scan on t1_1_prt_1 t1 -> Hash -> Seq Scan on t2 Optimizer: Postgres query optimizer (10 rows) cdbpullup_findEclassInTargetList() will ignore the presence or absence of a RelabelType node atop either expr in determining whether an EC member expr matches a targetlist expr. But get_eclass_for_sort_expr() didn't handle additional RelabelType and a new EC may be generated. We should not disturb get_eclass_for_sort_expr() as it's used at many places and UPSTREAM didn't add RelabelType to EC for that case. If find a sub_distkeyexpr from cdbpullup_findEclassInTargetList and we know that there is RelabelType stripped inside, we don't need to try to fetch or generate new EC in get_eclass_for_sort_expr(). So, add options for cdb_pull_up_eclass to ignore RelabelType when needed. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/cdb/cdbpathlocus.c | 8 +- src/backend/cdb/cdbpullup.c | 12 ++- src/backend/optimizer/path/pathkeys.c | 9 +- src/include/cdb/cdbpullup.h | 2 +- src/include/optimizer/paths.h | 3 +- src/test/regress/expected/partition_join.out | 95 ++++++++++++++++++++ src/test/regress/sql/partition_join.sql | 40 +++++++++ 7 files changed, 160 insertions(+), 9 deletions(-) diff --git a/src/backend/cdb/cdbpathlocus.c b/src/backend/cdb/cdbpathlocus.c index 04fba4b367a..29930085429 100644 --- a/src/backend/cdb/cdbpathlocus.c +++ b/src/backend/cdb/cdbpathlocus.c @@ -518,7 +518,8 @@ cdbpathlocus_from_subquery(struct PlannerInfo *root, rel->relids, usable_subtlist, new_vars, - -1 /* not used */); + -1 /* not used */, + false/* not used */); if (outer_ec) { outer_dk = makeNode(DistributionKey); @@ -589,7 +590,7 @@ cdbpathlocus_get_distkey_exprs(CdbPathLocus locus, EquivalenceClass *dk_eclass = (EquivalenceClass *) lfirst(ec_cell); item = cdbpullup_findEclassInTargetList(dk_eclass, targetlist, - distkey->dk_opfamily); + distkey->dk_opfamily, NULL); if (item) break; @@ -668,7 +669,8 @@ cdbpathlocus_pull_above_projection(struct PlannerInfo *root, relids, targetlist, newvarlist, - newrelid); + newrelid, + true /* ignore RelabelType */); if (new_ec) break; } diff --git a/src/backend/cdb/cdbpullup.c b/src/backend/cdb/cdbpullup.c index c6f243ae08a..a7aa6486856 100644 --- a/src/backend/cdb/cdbpullup.c +++ b/src/backend/cdb/cdbpullup.c @@ -241,7 +241,7 @@ cdbpullup_expr(Expr *expr, List *targetlist, List *newvarlist, Index newvarno) */ Expr * cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, - Oid hashOpFamily) + Oid hashOpFamily, bool *relabel_stripped) { ListCell *lc; @@ -276,7 +276,11 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, *------- */ while (IsA(key, RelabelType)) + { key = (Expr *) ((RelabelType *) key)->arg; + if(relabel_stripped && (!*relabel_stripped)) + *relabel_stripped = true; + } foreach(lc_tle, targetlist) { @@ -293,7 +297,11 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, /* ignore RelabelType nodes on both sides */ naked_tlexpr = tlexpr; while (naked_tlexpr && IsA(naked_tlexpr, RelabelType)) + { naked_tlexpr = (Node *) ((RelabelType *) naked_tlexpr)->arg; + if(relabel_stripped && (!*relabel_stripped)) + *relabel_stripped = true; + } if (IsA(key, Var)) { @@ -342,7 +350,7 @@ cdbpullup_truncatePathKeysForTargetList(List *pathkeys, List *targetlist) { PathKey *pk = (PathKey *) lfirst(lc); - if (!cdbpullup_findEclassInTargetList(pk->pk_eclass, targetlist, InvalidOid)) + if (!cdbpullup_findEclassInTargetList(pk->pk_eclass, targetlist, InvalidOid, NULL)) break; new_pathkeys = lappend(new_pathkeys, pk); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ece5f3cde82..48a60c137d3 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -1551,23 +1551,28 @@ cdb_pull_up_eclass(PlannerInfo *root, Relids relids, List *targetlist, List *newvarlist, - Index newrelid) + Index newrelid, + bool ignore_relabel) { Expr *sub_distkeyexpr; EquivalenceClass *outer_ec; Expr *newexpr = NULL; Index sortref = 0; + bool relabel_stripped = false; Assert(eclass); Assert(!newvarlist || list_length(newvarlist) == list_length(targetlist)); /* Find an expr that we can rewrite to use the projected columns. */ - sub_distkeyexpr = cdbpullup_findEclassInTargetList(eclass, targetlist, InvalidOid); + sub_distkeyexpr = cdbpullup_findEclassInTargetList(eclass, targetlist, InvalidOid, &relabel_stripped); /* Replace expr's Var nodes with new ones referencing the targetlist. */ if (sub_distkeyexpr) { + if (ignore_relabel && relabel_stripped) + return eclass; + newexpr = cdbpullup_expr(sub_distkeyexpr, targetlist, newvarlist, diff --git a/src/include/cdb/cdbpullup.h b/src/include/cdb/cdbpullup.h index 535b2987ce1..8c23a4a64f2 100644 --- a/src/include/cdb/cdbpullup.h +++ b/src/include/cdb/cdbpullup.h @@ -55,7 +55,7 @@ Expr * cdbpullup_expr(Expr *expr, List *targetlist, List *newvarlist, Index newvarno); -extern Expr *cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, Oid hashOpFamily); +extern Expr *cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, Oid hashOpFamily, bool *relabel_stripped); extern List *cdbpullup_truncatePathKeysForTargetList(List *pathkeys, List *targetlist); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9b890e95566..a90c654cea6 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -283,7 +283,8 @@ cdb_pull_up_eclass(PlannerInfo *root, Relids relids, List *targetlist, List *newvarlist, - Index newrelid); + Index newrelid, + bool ignore_relabel); extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 80bde64b3db..107cbc746e5 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -5700,3 +5700,98 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2 1 | 209 | 0009 | 1 | 209 | 0009 (8 rows) +-- +-- test issue https://github.com/apache/cloudberry/issues/1301 +-- +begin; +create table t_issue_1301_big( + id varchar(32), + t varchar(32) +) distributed by (id) +partition by range(t) +( +partition p1 start ('0') end ('5'), +partition p2 start ('5') end ('9999999999999999999') +); +create index idx_t_issue_1301_big_id on t_issue_1301_big(id); +insert into t_issue_1301_big select seq, seq from generate_series(1, 100000) as seq; +create table t_issue_1301_small( + id varchar(32), + t varchar(32) +) distributed by (id); +insert into t_issue_1301_small select seq*10000, seq*10000 from generate_series(1, 100) as seq; +set local optimizer = off; +set local enable_nestloop to on; +analyze t_issue_1301_big; +analyze t_issue_1301_small; +explain(costs off) select a.* from t_issue_1301_small a left join t_issue_1301_big b on a.id=b.id; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + -> Seq Scan on t_issue_1301_small a + -> Append + -> Index Only Scan using t_issue_1301_big_1_prt_p1_id_idx on t_issue_1301_big_1_prt_p1 b_1 + Index Cond: (id = (a.id)::text) + -> Index Only Scan using t_issue_1301_big_1_prt_p2_id_idx on t_issue_1301_big_1_prt_p2 b_2 + Index Cond: (id = (a.id)::text) + Optimizer: Postgres query optimizer +(9 rows) + +abort; +BEGIN; +CREATE TABLE t1 (id varchar(32), date date) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) +(START (date '2016-01-01') INCLUSIVE END (date '2016-01-04') EXCLUSIVE EVERY (INTERVAL '1 day')); +CREATE TABLE t2 (id varchar(32)) DISTRIBUTED BY (id); +analyze t1; +analyze t2; +\d+ t1; + Partitioned table "public.t1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+-----------------------+-----------+----------+---------+----------+--------------+------------- + id | character varying(32) | | | | extended | | + date | date | | | | plain | | +Partition key: RANGE (date) +Partitions: t1_1_prt_1 FOR VALUES FROM ('01-01-2016') TO ('01-02-2016'), + t1_1_prt_2 FOR VALUES FROM ('01-02-2016') TO ('01-03-2016'), + t1_1_prt_3 FOR VALUES FROM ('01-03-2016') TO ('01-04-2016') +Distributed by: (id) + +\d+ t2; + Table "public.t2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+-----------------------+-----------+----------+---------+----------+--------------+------------- + id | character varying(32) | | | | extended | | +Distributed by: (id) + +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1_1_prt_1 JOIN t2 USING(id); + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((t1_1_prt_1.id)::text = (t2.id)::text) + -> Seq Scan on t1_1_prt_1 + -> Hash + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(8 rows) + +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1 JOIN t2 USING(id); + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((t1.id)::text = (t2.id)::text) + -> Append + -> Seq Scan on t1_1_prt_1 t1_1 + -> Seq Scan on t1_1_prt_2 t1_2 + -> Seq Scan on t1_1_prt_3 t1_3 + -> Hash + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(11 rows) + +ABORT; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 24baa458b1d..404f9242b16 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -1173,3 +1173,43 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.c = t2 EXPLAIN (COSTS OFF) SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b; SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b; + +-- +-- test issue https://github.com/apache/cloudberry/issues/1301 +-- +begin; +create table t_issue_1301_big( + id varchar(32), + t varchar(32) +) distributed by (id) +partition by range(t) +( +partition p1 start ('0') end ('5'), +partition p2 start ('5') end ('9999999999999999999') +); +create index idx_t_issue_1301_big_id on t_issue_1301_big(id); +insert into t_issue_1301_big select seq, seq from generate_series(1, 100000) as seq; +create table t_issue_1301_small( + id varchar(32), + t varchar(32) +) distributed by (id); +insert into t_issue_1301_small select seq*10000, seq*10000 from generate_series(1, 100) as seq; +set local optimizer = off; +set local enable_nestloop to on; +analyze t_issue_1301_big; +analyze t_issue_1301_small; +explain(costs off) select a.* from t_issue_1301_small a left join t_issue_1301_big b on a.id=b.id; +abort; + +BEGIN; +CREATE TABLE t1 (id varchar(32), date date) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) +(START (date '2016-01-01') INCLUSIVE END (date '2016-01-04') EXCLUSIVE EVERY (INTERVAL '1 day')); +CREATE TABLE t2 (id varchar(32)) DISTRIBUTED BY (id); +analyze t1; +analyze t2; +\d+ t1; +\d+ t2; +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1_1_prt_1 JOIN t2 USING(id); +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1 JOIN t2 USING(id); +ABORT;