Skip to content

Commit 0faf8e1

Browse files
authored
FIX: re-enable the NL-index in ORCA and fix the Join2IndexApplyGeneric (apache#807)
The CXformJoin2IndexApplyGeneric xfrom will create the CPhysicalInnerHashJoin in ROOT path. But when DynamicGet in a child node, it doesn't go through any checks, which is incorrect. If the components of the current relationship are inconsistent with the group by key, the logical transformation will be invalid. After the current logical conversion success, the enforce phase will not be required to process the partial key by default.
1 parent cf204de commit 0faf8e1

26 files changed

+1353
-1509
lines changed

src/backend/gporca/libgpopt/src/operators/CPhysicalInnerIndexNLJoin.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "gpopt/base/CDistributionSpecHashed.h"
1818
#include "gpopt/base/CDistributionSpecNonSingleton.h"
1919
#include "gpopt/base/CDistributionSpecReplicated.h"
20-
#include "gpopt/exception.h"
2120
#include "gpopt/operators/CExpressionHandle.h"
2221
#include "gpopt/operators/CPredicateUtils.h"
2322

@@ -119,13 +118,6 @@ CPhysicalInnerIndexNLJoin::Ped(CMemoryPool *mp, CExpressionHandle &exprhdl,
119118
CEnfdDistribution::EDistributionMatching dmatch =
120119
Edm(prppInput, child_index, pdrgpdpCtxt, ulDistrReq);
121120

122-
// FIXME: nestloop with inner index scan may produce wrong plan, see
123-
// issue https://github.com/cloudberrydb/cloudberrydb/issues/567
124-
// Fallback to postgres optimizer to avoid wrong plan. We should
125-
// fix this issue and remove the following exception.
126-
GPOS_RAISE(gpopt::ExmaGPOPT, gpopt::ExmiUnsupportedOp,
127-
GPOS_WSZ_LIT("Fallback: InnerIndexNestLoopJoin may have wrong plan"));
128-
129121
if (1 == child_index)
130122
{
131123
// inner (index-scan side) is requested for Any distribution,

src/backend/gporca/libgpopt/src/xforms/CXformJoin2IndexApplyGeneric.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,14 @@ CXformJoin2IndexApplyGeneric::Transform(CXformContext *pxfctxt,
283283
CLogicalDynamicGet::PopConvert(pexprCurrInnerChild->Pop());
284284
ptabdescInner = popDynamicGet->Ptabdesc();
285285
distributionCols = popDynamicGet->PcrsDist();
286+
// issue https://github.com/apache/cloudberry/issues/567
287+
// the DynamicGet also need check the group key contains the distributionCols
288+
if (nullptr != groupingColsToCheck.Value() &&
289+
!groupingColsToCheck->ContainsAll(distributionCols))
290+
{
291+
// the grouping columns are not a superset of the distribution columns
292+
return;
293+
}
286294
pexprGet = pexprCurrInnerChild;
287295
}
288296
break;

src/test/regress/expected/aggregates_optimizer.out

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,39 +1342,31 @@ explain (costs off) select a,c from t1 group by a,c,d;
13421342
explain (costs off) select *
13431343
from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
13441344
group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
1345-
INFO: GPORCA failed to produce a plan, falling back to planner
1346-
DETAIL: Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
1347-
QUERY PLAN
1348-
------------------------------------------------------------
1345+
QUERY PLAN
1346+
-------------------------------------------------------
13491347
Gather Motion 3:1 (slice1; segments: 3)
1350-
-> HashAggregate
1351-
Group Key: t1.a, t1.b, t2.x, t2.y
1352-
-> Hash Join
1353-
Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
1354-
-> Seq Scan on t2
1355-
-> Hash
1356-
-> Seq Scan on t1
1357-
Optimizer: Postgres query optimizer
1358-
(9 rows)
1348+
-> Nested Loop
1349+
Join Filter: true
1350+
-> Seq Scan on t1
1351+
-> Index Scan using t2_pkey on t2
1352+
Index Cond: ((x = t1.a) AND (y = t1.b))
1353+
Optimizer: Pivotal Optimizer (GPORCA)
1354+
(7 rows)
13591355

13601356
-- Test case where t1 can be optimized but not t2
13611357
explain (costs off) select t1.*,t2.x,t2.z
13621358
from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
13631359
group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
1364-
INFO: GPORCA failed to produce a plan, falling back to planner
1365-
DETAIL: Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
1366-
QUERY PLAN
1367-
------------------------------------------------------------
1360+
QUERY PLAN
1361+
-------------------------------------------------------
13681362
Gather Motion 3:1 (slice1; segments: 3)
1369-
-> HashAggregate
1370-
Group Key: t1.a, t1.b, t2.x, t2.z
1371-
-> Hash Join
1372-
Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
1373-
-> Seq Scan on t2
1374-
-> Hash
1375-
-> Seq Scan on t1
1376-
Optimizer: Postgres query optimizer
1377-
(9 rows)
1363+
-> Nested Loop
1364+
Join Filter: true
1365+
-> Seq Scan on t1
1366+
-> Index Scan using t2_pkey on t2
1367+
Index Cond: ((x = t1.a) AND (y = t1.b))
1368+
Optimizer: Pivotal Optimizer (GPORCA)
1369+
(7 rows)
13781370

13791371
-- Cannot optimize when PK is deferrable
13801372
explain (costs off) select * from t3 group by a,b,c;

src/test/regress/expected/bfv_index_optimizer.out

Lines changed: 90 additions & 118 deletions
Large diffs are not rendered by default.

src/test/regress/expected/bfv_partition_plans.out

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur
114114
insert into mpp23195_t1 values (generate_series(1,19));
115115
insert into mpp23195_t2 values (1);
116116
-- TEST
117-
-- Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
118117
select find_operator('select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;', 'Dynamic Index Scan');
119118
find_operator
120119
---------------

src/test/regress/expected/bfv_partition_plans_optimizer.out

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,10 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur
114114
insert into mpp23195_t1 values (generate_series(1,19));
115115
insert into mpp23195_t2 values (1);
116116
-- TEST
117-
-- Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
118117
select find_operator('select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;', 'Dynamic Index Scan');
119118
find_operator
120119
---------------
121-
['false']
120+
['true']
122121
(1 row)
123122

124123
select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;

src/test/regress/expected/co_nestloop_idxscan_optimizer.out

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,16 @@ create index foo_id_idx on co_nestloop_idxscan.foo(id);
2727
explain select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
2828
QUERY PLAN
2929
-------------------------------------------------------------------------------------
30-
Gather Motion 3:1 (slice1; segments: 3) (cost=1.02..510.33 rows=6 width=8)
31-
-> Hash Join (cost=1.02..510.25 rows=2 width=8)
32-
Hash Cond: (f.id = b.id)
33-
-> Seq Scan on foo f (cost=0.00..509.17 rows=17 width=8)
34-
-> Hash (cost=1.01..1.01 rows=1 width=8)
35-
-> Seq Scan on bar b (cost=0.00..1.01 rows=1 width=8)
36-
Optimizer: Postgres query optimizer
37-
(7 rows)
30+
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8)
31+
-> Nested Loop (cost=0.00..499.13 rows=1 width=8)
32+
Join Filter: true
33+
-> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8)
34+
-> Bitmap Heap Scan on foo (cost=0.00..68.13 rows=1 width=8)
35+
Recheck Cond: (id = bar.id)
36+
-> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0)
37+
Index Cond: (id = bar.id)
38+
Optimizer: Pivotal Optimizer (GPORCA) version 3.72.0
39+
(9 rows)
3840

3941
select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
4042
id
@@ -49,15 +51,16 @@ set enable_nestloop=on;
4951
explain select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
5052
QUERY PLAN
5153
-------------------------------------------------------------------------------------
52-
Gather Motion 3:1 (slice1; segments: 3) (cost=8.15..13.26 rows=6 width=8)
53-
-> Nested Loop (cost=8.15..13.18 rows=2 width=8)
54-
-> Seq Scan on bar b (cost=0.00..1.01 rows=1 width=8)
55-
-> Bitmap Heap Scan on foo f (cost=8.15..12.16 rows=1 width=8)
56-
Recheck Cond: (id = b.id)
57-
-> Bitmap Index Scan on foo_id_idx (cost=0.00..8.15 rows=1 width=0)
58-
Index Cond: (id = b.id)
59-
Optimizer: Postgres query optimizer
60-
(8 rows)
54+
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8)
55+
-> Nested Loop (cost=0.00..499.13 rows=1 width=8)
56+
Join Filter: true
57+
-> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8)
58+
-> Bitmap Heap Scan on foo (cost=0.00..68.13 rows=1 width=8)
59+
Recheck Cond: (id = bar.id)
60+
-> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0)
61+
Index Cond: (id = bar.id)
62+
Optimizer: Pivotal Optimizer (GPORCA) version 3.72.0
63+
(9 rows)
6164

6265
select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
6366
id
@@ -73,15 +76,16 @@ set enable_seqscan = off;
7376
explain select f.id from co_nestloop_idxscan.bar b, co_nestloop_idxscan.foo f where f.id = b.id;
7477
QUERY PLAN
7578
-------------------------------------------------------------------------------------
76-
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000008.15..10000000013.26 rows=6 width=8)
77-
-> Nested Loop (cost=10000000008.15..10000000013.18 rows=2 width=8)
78-
-> Seq Scan on bar b (cost=10000000000.00..10000000001.01 rows=1 width=8)
79-
-> Bitmap Heap Scan on foo f (cost=8.15..12.16 rows=1 width=8)
80-
Recheck Cond: (id = b.id)
81-
-> Bitmap Index Scan on foo_id_idx (cost=0.00..8.15 rows=1 width=0)
82-
Index Cond: (id = b.id)
83-
Optimizer: Postgres query optimizer
84-
(8 rows)
79+
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8)
80+
-> Nested Loop (cost=0.00..499.13 rows=1 width=8)
81+
Join Filter: true
82+
-> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8)
83+
-> Bitmap Heap Scan on foo (cost=0.00..68.13 rows=1 width=8)
84+
Recheck Cond: (id = bar.id)
85+
-> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0)
86+
Index Cond: (id = bar.id)
87+
Optimizer: Pivotal Optimizer (GPORCA) version 3.72.0
88+
(9 rows)
8589

8690
select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
8791
id

0 commit comments

Comments
 (0)