apache · avamingli · Aug 13, 2025 · Aug 11, 2025
diff --git a/src/backend/cdb/cdbpathlocus.c b/src/backend/cdb/cdbpathlocus.c
@@ -518,7 +518,8 @@ cdbpathlocus_from_subquery(struct PlannerInfo *root,
 											  rel->relids,
 											  usable_subtlist,
 											  new_vars,
-											  -1 /* not used */);
+											  -1 /* not used */,
+											  false/* not used */);
 				if (outer_ec)
 				{
 					outer_dk = makeNode(DistributionKey);
@@ -589,7 +590,7 @@ cdbpathlocus_get_distkey_exprs(CdbPathLocus locus,
 				EquivalenceClass *dk_eclass = (EquivalenceClass *) lfirst(ec_cell);
 
 				item = cdbpullup_findEclassInTargetList(dk_eclass, targetlist,
-														distkey->dk_opfamily);
+														distkey->dk_opfamily, NULL);
 
 				if (item)
 					break;
@@ -668,7 +669,8 @@ cdbpathlocus_pull_above_projection(struct PlannerInfo *root,
 											relids,
 											targetlist,
 											newvarlist,
-											newrelid);
+											newrelid,
+											true /* ignore RelabelType */);
 				if (new_ec)
 					break;
 			}

diff --git a/src/backend/cdb/cdbpullup.c b/src/backend/cdb/cdbpullup.c
@@ -241,7 +241,7 @@ cdbpullup_expr(Expr *expr, List *targetlist, List *newvarlist, Index newvarno)
  */
 Expr *
 cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist,
-								 Oid hashOpFamily)
+								 Oid hashOpFamily, bool *relabel_stripped)
 {
 	ListCell   *lc;
 
@@ -276,7 +276,11 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist,
 		 *-------
 		 */
 		while (IsA(key, RelabelType))
+		{
 			key = (Expr *) ((RelabelType *) key)->arg;
+			if(relabel_stripped && (!*relabel_stripped))
+				*relabel_stripped = true;
+		}
 
 		foreach(lc_tle, targetlist)
 		{
@@ -293,7 +297,11 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist,
 			/* ignore RelabelType nodes on both sides */
 			naked_tlexpr = tlexpr;
 			while (naked_tlexpr && IsA(naked_tlexpr, RelabelType))
+			{
 				naked_tlexpr = (Node *) ((RelabelType *) naked_tlexpr)->arg;
+				if(relabel_stripped && (!*relabel_stripped))
+					*relabel_stripped = true;
+			}
 
 			if (IsA(key, Var))
 			{
@@ -342,7 +350,7 @@ cdbpullup_truncatePathKeysForTargetList(List *pathkeys, List *targetlist)
 	{
 		PathKey	   *pk = (PathKey *) lfirst(lc);
 
-		if (!cdbpullup_findEclassInTargetList(pk->pk_eclass, targetlist, InvalidOid))
+		if (!cdbpullup_findEclassInTargetList(pk->pk_eclass, targetlist, InvalidOid, NULL))
 			break;
 
 		new_pathkeys = lappend(new_pathkeys, pk);

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
@@ -1551,23 +1551,28 @@ cdb_pull_up_eclass(PlannerInfo *root,
 				   Relids relids,
 				   List *targetlist,
 				   List *newvarlist,
-				   Index newrelid)
+				   Index newrelid,
+				   bool ignore_relabel)
 {
 	Expr	   *sub_distkeyexpr;
 	EquivalenceClass *outer_ec;
 	Expr	   *newexpr = NULL;
 	Index sortref = 0;
+	bool	relabel_stripped = false;
 
 	Assert(eclass);
 	Assert(!newvarlist ||
 		   list_length(newvarlist) == list_length(targetlist));
 
 	/* Find an expr that we can rewrite to use the projected columns. */
-	sub_distkeyexpr = cdbpullup_findEclassInTargetList(eclass, targetlist, InvalidOid);
+	sub_distkeyexpr = cdbpullup_findEclassInTargetList(eclass, targetlist, InvalidOid, &relabel_stripped);
 
 	/* Replace expr's Var nodes with new ones referencing the targetlist. */
 	if (sub_distkeyexpr)
 	{
+		if (ignore_relabel && relabel_stripped)
+			return eclass;
+
 		newexpr = cdbpullup_expr(sub_distkeyexpr,
 								 targetlist,
 								 newvarlist,

diff --git a/src/include/cdb/cdbpullup.h b/src/include/cdb/cdbpullup.h
@@ -55,7 +55,7 @@
 Expr *
 cdbpullup_expr(Expr *expr, List *targetlist, List *newvarlist, Index newvarno);
 
-extern Expr *cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, Oid hashOpFamily);
+extern Expr *cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, Oid hashOpFamily, bool *relabel_stripped);
 
 extern List *cdbpullup_truncatePathKeysForTargetList(List *pathkeys, List *targetlist);
 

diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
@@ -283,7 +283,8 @@ cdb_pull_up_eclass(PlannerInfo    *root,
                     Relids          relids,
                     List           *targetlist,
                     List           *newvarlist,
-                    Index           newrelid);
+                    Index           newrelid,
+					bool			ignore_relabel);
 
 extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
 										   List *sortclauses,

diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
@@ -5700,3 +5700,98 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
   1 | 209 | 0009 |  1 | 209 | 0009
 (8 rows)
 
+--
+-- test issue https://github.com/apache/cloudberry/issues/1301
+--
+begin;
+create table t_issue_1301_big(
+	id varchar(32),
+	t varchar(32)
+) distributed by (id)
+partition by range(t)
+(
+partition p1 start ('0') end ('5'),
+partition p2 start ('5') end ('9999999999999999999')
+);
+create index idx_t_issue_1301_big_id on t_issue_1301_big(id);
+insert into t_issue_1301_big select seq, seq from generate_series(1, 100000) as seq;
+create table t_issue_1301_small(
+  id varchar(32),
+  t varchar(32)
+) distributed by (id);
+insert into t_issue_1301_small select seq*10000, seq*10000 from generate_series(1, 100) as seq;
+set local optimizer = off;
+set local enable_nestloop to on;
+analyze t_issue_1301_big;
+analyze t_issue_1301_small;
+explain(costs off) select a.* from t_issue_1301_small a left join t_issue_1301_big b on a.id=b.id;
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Nested Loop Left Join
+         ->  Seq Scan on t_issue_1301_small a
+         ->  Append
+               ->  Index Only Scan using t_issue_1301_big_1_prt_p1_id_idx on t_issue_1301_big_1_prt_p1 b_1
+                     Index Cond: (id = (a.id)::text)
+               ->  Index Only Scan using t_issue_1301_big_1_prt_p2_id_idx on t_issue_1301_big_1_prt_p2 b_2
+                     Index Cond: (id = (a.id)::text)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+abort;
+BEGIN;
+CREATE TABLE t1 (id varchar(32), date date) DISTRIBUTED BY (id)
+PARTITION BY RANGE (date)
+(START (date '2016-01-01') INCLUSIVE END (date '2016-01-04') EXCLUSIVE EVERY (INTERVAL '1 day'));
+CREATE TABLE t2 (id varchar(32)) DISTRIBUTED BY (id);
+analyze t1;
+analyze t2;
+\d+ t1;
+                                      Partitioned table "public.t1"
+ Column |         Type          | Collation | Nullable | Default | Storage  | Stats target | Description 
+--------+-----------------------+-----------+----------+---------+----------+--------------+-------------
+ id     | character varying(32) |           |          |         | extended |              | 
+ date   | date                  |           |          |         | plain    |              | 
+Partition key: RANGE (date)
+Partitions: t1_1_prt_1 FOR VALUES FROM ('01-01-2016') TO ('01-02-2016'),
+            t1_1_prt_2 FOR VALUES FROM ('01-02-2016') TO ('01-03-2016'),
+            t1_1_prt_3 FOR VALUES FROM ('01-03-2016') TO ('01-04-2016')
+Distributed by: (id)
+
+\d+ t2;
+                                            Table "public.t2"
+ Column |         Type          | Collation | Nullable | Default | Storage  | Stats target | Description 
+--------+-----------------------+-----------+----------+---------+----------+--------------+-------------
+ id     | character varying(32) |           |          |         | extended |              | 
+Distributed by: (id)
+
+EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1_1_prt_1 JOIN t2 USING(id);
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         ->  Hash Join
+               Hash Cond: ((t1_1_prt_1.id)::text = (t2.id)::text)
+               ->  Seq Scan on t1_1_prt_1
+               ->  Hash
+                     ->  Seq Scan on t2
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1 JOIN t2 USING(id);
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         ->  Hash Join
+               Hash Cond: ((t1.id)::text = (t2.id)::text)
+               ->  Append
+                     ->  Seq Scan on t1_1_prt_1 t1_1
+                     ->  Seq Scan on t1_1_prt_2 t1_2
+                     ->  Seq Scan on t1_1_prt_3 t1_3
+               ->  Hash
+                     ->  Seq Scan on t2
+ Optimizer: Postgres query optimizer
+(11 rows)
+
+ABORT;
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
@@ -1173,3 +1173,43 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.c = t2
 EXPLAIN (COSTS OFF)
 SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b;
 SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b;
+
+--
+-- test issue https://github.com/apache/cloudberry/issues/1301
+--
+begin;
+create table t_issue_1301_big(
+	id varchar(32),
+	t varchar(32)
+) distributed by (id)
+partition by range(t)
+(
+partition p1 start ('0') end ('5'),
+partition p2 start ('5') end ('9999999999999999999')
+);
+create index idx_t_issue_1301_big_id on t_issue_1301_big(id);
+insert into t_issue_1301_big select seq, seq from generate_series(1, 100000) as seq;
+create table t_issue_1301_small(
+  id varchar(32),
+  t varchar(32)
+) distributed by (id);
+insert into t_issue_1301_small select seq*10000, seq*10000 from generate_series(1, 100) as seq;
+set local optimizer = off;
+set local enable_nestloop to on;
+analyze t_issue_1301_big;
+analyze t_issue_1301_small;
+explain(costs off) select a.* from t_issue_1301_small a left join t_issue_1301_big b on a.id=b.id;
+abort;
+
+BEGIN;
+CREATE TABLE t1 (id varchar(32), date date) DISTRIBUTED BY (id)
+PARTITION BY RANGE (date)
+(START (date '2016-01-01') INCLUSIVE END (date '2016-01-04') EXCLUSIVE EVERY (INTERVAL '1 day'));
+CREATE TABLE t2 (id varchar(32)) DISTRIBUTED BY (id);
+analyze t1;
+analyze t2;
+\d+ t1;
+\d+ t2;
+EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1_1_prt_1 JOIN t2 USING(id);
+EXPLAIN(COSTS OFF) SELECT COUNT(*) FROM t1 JOIN t2 USING(id);
+ABORT;