diff --git a/src/backend/cdb/cdbpathlocus.c b/src/backend/cdb/cdbpathlocus.c index 04fba4b367a..29930085429 100644 --- a/src/backend/cdb/cdbpathlocus.c +++ b/src/backend/cdb/cdbpathlocus.c @@ -518,7 +518,8 @@ cdbpathlocus_from_subquery(struct PlannerInfo *root, rel->relids, usable_subtlist, new_vars, - -1 /* not used */); + -1 /* not used */, + false/* not used */); if (outer_ec) { outer_dk = makeNode(DistributionKey); @@ -589,7 +590,7 @@ cdbpathlocus_get_distkey_exprs(CdbPathLocus locus, EquivalenceClass *dk_eclass = (EquivalenceClass *) lfirst(ec_cell); item = cdbpullup_findEclassInTargetList(dk_eclass, targetlist, - distkey->dk_opfamily); + distkey->dk_opfamily, NULL); if (item) break; @@ -668,7 +669,8 @@ cdbpathlocus_pull_above_projection(struct PlannerInfo *root, relids, targetlist, newvarlist, - newrelid); + newrelid, + true /* ignore RelabelType */); if (new_ec) break; } diff --git a/src/backend/cdb/cdbpullup.c b/src/backend/cdb/cdbpullup.c index c6f243ae08a..a7aa6486856 100644 --- a/src/backend/cdb/cdbpullup.c +++ b/src/backend/cdb/cdbpullup.c @@ -241,7 +241,7 @@ cdbpullup_expr(Expr *expr, List *targetlist, List *newvarlist, Index newvarno) */ Expr * cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, - Oid hashOpFamily) + Oid hashOpFamily, bool *relabel_stripped) { ListCell *lc; @@ -276,7 +276,11 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, *------- */ while (IsA(key, RelabelType)) + { key = (Expr *) ((RelabelType *) key)->arg; + if(relabel_stripped && (!*relabel_stripped)) + *relabel_stripped = true; + } foreach(lc_tle, targetlist) { @@ -293,7 +297,11 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, /* ignore RelabelType nodes on both sides */ naked_tlexpr = tlexpr; while (naked_tlexpr && IsA(naked_tlexpr, RelabelType)) + { naked_tlexpr = (Node *) ((RelabelType *) naked_tlexpr)->arg; + if(relabel_stripped && (!*relabel_stripped)) + *relabel_stripped = true; + } if (IsA(key, Var)) { @@ -342,7 +350,7 @@ cdbpullup_truncatePathKeysForTargetList(List *pathkeys, List *targetlist) { PathKey *pk = (PathKey *) lfirst(lc); - if (!cdbpullup_findEclassInTargetList(pk->pk_eclass, targetlist, InvalidOid)) + if (!cdbpullup_findEclassInTargetList(pk->pk_eclass, targetlist, InvalidOid, NULL)) break; new_pathkeys = lappend(new_pathkeys, pk); diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ece5f3cde82..48a60c137d3 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -1551,23 +1551,28 @@ cdb_pull_up_eclass(PlannerInfo *root, Relids relids, List *targetlist, List *newvarlist, - Index newrelid) + Index newrelid, + bool ignore_relabel) { Expr *sub_distkeyexpr; EquivalenceClass *outer_ec; Expr *newexpr = NULL; Index sortref = 0; + bool relabel_stripped = false; Assert(eclass); Assert(!newvarlist || list_length(newvarlist) == list_length(targetlist)); /* Find an expr that we can rewrite to use the projected columns. */ - sub_distkeyexpr = cdbpullup_findEclassInTargetList(eclass, targetlist, InvalidOid); + sub_distkeyexpr = cdbpullup_findEclassInTargetList(eclass, targetlist, InvalidOid, &relabel_stripped); /* Replace expr's Var nodes with new ones referencing the targetlist. */ if (sub_distkeyexpr) { + if (ignore_relabel && relabel_stripped) + return eclass; + newexpr = cdbpullup_expr(sub_distkeyexpr, targetlist, newvarlist, diff --git a/src/include/cdb/cdbpullup.h b/src/include/cdb/cdbpullup.h index 535b2987ce1..8c23a4a64f2 100644 --- a/src/include/cdb/cdbpullup.h +++ b/src/include/cdb/cdbpullup.h @@ -55,7 +55,7 @@ Expr * cdbpullup_expr(Expr *expr, List *targetlist, List *newvarlist, Index newvarno); -extern Expr *cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, Oid hashOpFamily); +extern Expr *cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, Oid hashOpFamily, bool *relabel_stripped); extern List *cdbpullup_truncatePathKeysForTargetList(List *pathkeys, List *targetlist); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9b890e95566..a90c654cea6 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -283,7 +283,8 @@ cdb_pull_up_eclass(PlannerInfo *root, Relids relids, List *targetlist, List *newvarlist, - Index newrelid); + Index newrelid, + bool ignore_relabel); extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 80bde64b3db..107cbc746e5 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -5700,3 +5700,98 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2 1 | 209 | 0009 | 1 | 209 | 0009 (8 rows) +-- +-- test issue https://github.com/apache/cloudberry/issues/1301 +-- +begin; +create table t_issue_1301_big( + id varchar(32), + t varchar(32) +) distributed by (id) +partition by range(t) +( +partition p1 start ('0') end ('5'), +partition p2 start ('5') end ('9999999999999999999') +); +create index idx_t_issue_1301_big_id on t_issue_1301_big(id); +insert into t_issue_1301_big select seq, seq from generate_series(1, 100000) as seq; +create table t_issue_1301_small( + id varchar(32), + t varchar(32) +) distributed by (id); +insert into t_issue_1301_small select seq*10000, seq*10000 from generate_series(1, 100) as seq; +set local optimizer = off; +set local enable_nestloop to on; +analyze t_issue_1301_big; +analyze t_issue_1301_small; +explain(costs off) select a.* from t_issue_1301_small a left join t_issue_1301_big b on a.id=b.id; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Nested Loop Left Join + -> Seq Scan on t_issue_1301_small a + -> Append + -> Index Only Scan using t_issue_1301_big_1_prt_p1_id_idx on t_issue_1301_big_1_prt_p1 b_1 + Index Cond: (id = (a.id)::text) + -> Index Only Scan using t_issue_1301_big_1_prt_p2_id_idx on t_issue_1301_big_1_prt_p2 b_2 + Index Cond: (id = (a.id)::text) + Optimizer: Postgres query optimizer +(9 rows) + +abort; +BEGIN; +CREATE TABLE t1 (id varchar(32), date date) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) +(START (date '2016-01-01') INCLUSIVE END (date '2016-01-04') EXCLUSIVE EVERY (INTERVAL '1 day')); +CREATE TABLE t2 (id varchar(32)) DISTRIBUTED BY (id); +analyze t1; +analyze t2; +\d+ t1; + Partitioned table "public.t1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+-----------------------+-----------+----------+---------+----------+--------------+------------- + id | character varying(32) | | | | extended | | + date | date | | | | plain | | +Partition key: RANGE (date) +Partitions: t1_1_prt_1 FOR VALUES FROM ('01-01-2016') TO ('01-02-2016'), + t1_1_prt_2 FOR VALUES FROM ('01-02-2016') TO ('01-03-2016'), + t1_1_prt_3 FOR VALUES FROM ('01-03-2016') TO ('01-04-2016') +Distributed by: (id) + +\d+ t2; + Table "public.t2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+-----------------------+-----------+----------+---------+----------+--------------+------------- + id | character varying(32) | | | | extended | | +Distributed by: (id) + +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1_1_prt_1 JOIN t2 USING(id); + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((t1_1_prt_1.id)::text = (t2.id)::text) + -> Seq Scan on t1_1_prt_1 + -> Hash + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(8 rows) + +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1 JOIN t2 USING(id); + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: ((t1.id)::text = (t2.id)::text) + -> Append + -> Seq Scan on t1_1_prt_1 t1_1 + -> Seq Scan on t1_1_prt_2 t1_2 + -> Seq Scan on t1_1_prt_3 t1_3 + -> Hash + -> Seq Scan on t2 + Optimizer: Postgres query optimizer +(11 rows) + +ABORT; diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 24baa458b1d..404f9242b16 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -1173,3 +1173,43 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.c = t2 EXPLAIN (COSTS OFF) SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b; SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b; + +-- +-- test issue https://github.com/apache/cloudberry/issues/1301 +-- +begin; +create table t_issue_1301_big( + id varchar(32), + t varchar(32) +) distributed by (id) +partition by range(t) +( +partition p1 start ('0') end ('5'), +partition p2 start ('5') end ('9999999999999999999') +); +create index idx_t_issue_1301_big_id on t_issue_1301_big(id); +insert into t_issue_1301_big select seq, seq from generate_series(1, 100000) as seq; +create table t_issue_1301_small( + id varchar(32), + t varchar(32) +) distributed by (id); +insert into t_issue_1301_small select seq*10000, seq*10000 from generate_series(1, 100) as seq; +set local optimizer = off; +set local enable_nestloop to on; +analyze t_issue_1301_big; +analyze t_issue_1301_small; +explain(costs off) select a.* from t_issue_1301_small a left join t_issue_1301_big b on a.id=b.id; +abort; + +BEGIN; +CREATE TABLE t1 (id varchar(32), date date) DISTRIBUTED BY (id) +PARTITION BY RANGE (date) +(START (date '2016-01-01') INCLUSIVE END (date '2016-01-04') EXCLUSIVE EVERY (INTERVAL '1 day')); +CREATE TABLE t2 (id varchar(32)) DISTRIBUTED BY (id); +analyze t1; +analyze t2; +\d+ t1; +\d+ t2; +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1_1_prt_1 JOIN t2 USING(id); +EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1 JOIN t2 USING(id); +ABORT;