Skip to content

Commit 30f8fd3

Browse files
committed
unnest subquery before RewriteCteChildren
1 parent aa2eb58 commit 30f8fd3

File tree

2 files changed

+189
-121
lines changed

2 files changed

+189
-121
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java

Lines changed: 126 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -419,67 +419,69 @@ public class Rewriter extends AbstractBatchJobExecutor {
419419
)
420420
);
421421

422+
private static final List<RewriteJob> NORMALIZE_PLAN_JOBS = jobs(
423+
topic("Plan Normalization",
424+
custom(RuleType.FOLD_CONSTANT_FOR_SQL_CACHE, FoldConstantForSqlCache::new),
425+
// move MergeProjects rule from analyze phase
426+
// because SubqueryToApply and BindSink rule may create extra project node
427+
// we need merge them at the beginning of rewrite phase to let later rules happy
428+
topDown(new MergeProjectable()),
429+
topDown(
430+
new EliminateOrderByConstant(),
431+
new EliminateSortUnderSubqueryOrView(),
432+
// MergeProjects depends on this rule
433+
new LogicalSubQueryAliasToLogicalProject(),
434+
// TODO: we should do expression normalization after plan normalization
435+
// because some rewritten depends on sub expression tree matching
436+
// such as group by key matching and replaced
437+
// but we need to do some normalization before subquery unnesting,
438+
// such as extract common expression.
439+
ExpressionNormalizationAndOptimization.FULL_RULE_INSTANCE,
440+
new AvgDistinctToSumDivCount(),
441+
new CountDistinctRewrite(),
442+
new ExtractFilterFromCrossJoin()
443+
),
444+
topDown(
445+
// ExtractSingleTableExpressionFromDisjunction conflict to InPredicateToEqualToRule
446+
// in the ExpressionNormalization, so must invoke in another job, otherwise dead loop.
447+
new ExtractSingleTableExpressionFromDisjunction()
448+
)
449+
),
450+
// subquery unnesting relay on ExpressionNormalization to extract common factor expression
451+
topic("Subquery unnesting",
452+
cascadesContext -> cascadesContext.rewritePlanContainsTypes(LogicalApply.class),
453+
// after doing NormalizeAggregate in analysis job
454+
// we need run the following 2 rules to make AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION work
455+
bottomUp(new PullUpProjectUnderApply()),
456+
topDown(
457+
new PushDownFilterThroughProject(),
458+
// the subquery may have where and having clause
459+
// so there may be two filters we need to merge them
460+
new MergeFilters()
461+
),
462+
// query rewrite support window, so add this rule here
463+
custom(RuleType.AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION, AggScalarSubQueryToWindowFunction::new),
464+
bottomUp(
465+
new EliminateUselessPlanUnderApply(),
466+
// CorrelateApplyToUnCorrelateApply and ApplyToJoin
467+
// and SelectMaterializedIndexWithAggregate depends on this rule
468+
new MergeProjectable(),
469+
/*
470+
* Subquery unnesting.
471+
* 1. Adjust the plan in correlated logicalApply
472+
* so that there are no correlated columns in the subquery.
473+
* 2. Convert logicalApply to a logicalJoin.
474+
* TODO: group these rules to make sure the result plan is what we expected.
475+
*/
476+
new CorrelateApplyToUnCorrelateApply(),
477+
new ApplyToJoin()
478+
)
479+
)
480+
);
481+
422482
private static final List<RewriteJob> CTE_CHILDREN_REWRITE_JOBS_BEFORE_SUB_PATH_PUSH_DOWN = notTraverseChildrenOf(
423483
ImmutableSet.of(LogicalCTEAnchor.class),
424484
() -> jobs(
425-
topic("Plan Normalization",
426-
custom(RuleType.FOLD_CONSTANT_FOR_SQL_CACHE, FoldConstantForSqlCache::new),
427-
// move MergeProjects rule from analyze phase
428-
// because SubqueryToApply and BindSink rule may create extra project node
429-
// we need merge them at the beginning of rewrite phase to let later rules happy
430-
topDown(new MergeProjectable()),
431-
topDown(
432-
new EliminateOrderByConstant(),
433-
new EliminateSortUnderSubqueryOrView(),
434-
// MergeProjects depends on this rule
435-
new LogicalSubQueryAliasToLogicalProject(),
436-
// TODO: we should do expression normalization after plan normalization
437-
// because some rewritten depends on sub expression tree matching
438-
// such as group by key matching and replaced
439-
// but we need to do some normalization before subquery unnesting,
440-
// such as extract common expression.
441-
ExpressionNormalizationAndOptimization.FULL_RULE_INSTANCE,
442-
new AvgDistinctToSumDivCount(),
443-
new CountDistinctRewrite(),
444-
new ExtractFilterFromCrossJoin()
445-
),
446-
topDown(
447-
// ExtractSingleTableExpressionFromDisjunction conflict to InPredicateToEqualToRule
448-
// in the ExpressionNormalization, so must invoke in another job, otherwise dead loop.
449-
new ExtractSingleTableExpressionFromDisjunction()
450-
)
451-
),
452-
// subquery unnesting relay on ExpressionNormalization to extract common factor expression
453-
topic("Subquery unnesting",
454-
cascadesContext -> cascadesContext.rewritePlanContainsTypes(LogicalApply.class),
455-
// after doing NormalizeAggregate in analysis job
456-
// we need run the following 2 rules to make AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION work
457-
bottomUp(new PullUpProjectUnderApply()),
458-
topDown(
459-
new PushDownFilterThroughProject(),
460-
// the subquery may have where and having clause
461-
// so there may be two filters we need to merge them
462-
new MergeFilters()
463-
),
464-
// query rewrite support window, so add this rule here
465-
custom(RuleType.AGG_SCALAR_SUBQUERY_TO_WINDOW_FUNCTION, AggScalarSubQueryToWindowFunction::new),
466-
bottomUp(
467-
new EliminateUselessPlanUnderApply(),
468-
// CorrelateApplyToUnCorrelateApply and ApplyToJoin
469-
// and SelectMaterializedIndexWithAggregate depends on this rule
470-
new MergeProjectable(),
471-
/*
472-
* Subquery unnesting.
473-
* 1. Adjust the plan in correlated logicalApply
474-
* so that there are no correlated columns in the subquery.
475-
* 2. Convert logicalApply to a logicalJoin.
476-
* TODO: group these rules to make sure the result plan is what we expected.
477-
*/
478-
new CorrelateApplyToUnCorrelateApply(),
479-
new ApplyToJoin()
480-
)
481-
),
482-
483485
// before `Subquery unnesting` topic, some correlate slots should have appeared at LogicalApply.left,
484486
// but it appeared at LogicalApply.right. After the `Subquery unnesting` topic, all slots is placed in a
485487
// normal position, then we can check column privileges by these steps
@@ -872,75 +874,78 @@ private static List<RewriteJob> getWholeTreeRewriteJobs(
872874
List<RewriteJob> beforePushDownJobs,
873875
List<RewriteJob> afterPushDownJobs,
874876
boolean runCboRules) {
877+
ImmutableList.Builder<RewriteJob> builder = ImmutableList.builder();
878+
builder.addAll(NORMALIZE_PLAN_JOBS);
879+
builder.addAll(notTraverseChildrenOf(
880+
ImmutableSet.of(LogicalCTEAnchor.class),
881+
() -> {
882+
List<RewriteJob> rewriteJobs = Lists.newArrayListWithExpectedSize(300);
875883

876-
return notTraverseChildrenOf(
877-
ImmutableSet.of(LogicalCTEAnchor.class),
878-
() -> {
879-
List<RewriteJob> rewriteJobs = Lists.newArrayListWithExpectedSize(300);
884+
rewriteJobs.addAll(jobs(
885+
topic("cte inline and pull up all cte anchor",
886+
custom(RuleType.PULL_UP_CTE_ANCHOR, PullUpCteAnchor::new),
887+
custom(RuleType.CTE_INLINE, CTEInline::new)
888+
),
889+
topic("process limit session variables",
890+
custom(RuleType.ADD_DEFAULT_LIMIT, AddDefaultLimit::new)
891+
),
892+
topic("record query tmp plan for mv pre rewrite",
893+
custom(RuleType.RECORD_PLAN_FOR_MV_PRE_REWRITE, RecordPlanForMvPreRewrite::new)
894+
),
895+
topic("rewrite cte sub-tree before sub path push down",
896+
custom(RuleType.REWRITE_CTE_CHILDREN,
897+
() -> new RewriteCteChildren(beforePushDownJobs, runCboRules)
898+
)
899+
)));
900+
rewriteJobs.addAll(jobs(topic("convert outer join to anti",
901+
custom(RuleType.CONVERT_OUTER_JOIN_TO_ANTI, ConvertOuterJoinToAntiJoin::new))));
902+
rewriteJobs.addAll(jobs(topic("eliminate group by key by uniform",
903+
custom(RuleType.ELIMINATE_GROUP_BY_KEY_BY_UNIFORM, EliminateGroupByKeyByUniform::new))));
904+
if (needOrExpansion) {
905+
rewriteJobs.addAll(jobs(topic("or expansion",
906+
custom(RuleType.OR_EXPANSION, () -> OrExpansion.INSTANCE))));
907+
}
908+
909+
rewriteJobs.addAll(jobs(topic("split multi distinct",
910+
custom(RuleType.DISTINCT_AGG_STRATEGY_SELECTOR,
911+
() -> DistinctAggStrategySelector.INSTANCE))));
912+
913+
// Rewrite search function before VariantSubPathPruning
914+
// so that ElementAt expressions from search can be processed
915+
rewriteJobs.addAll(jobs(
916+
bottomUp(new RewriteSearchToSlots())
917+
));
880918

881-
rewriteJobs.addAll(jobs(
882-
topic("cte inline and pull up all cte anchor",
883-
custom(RuleType.PULL_UP_CTE_ANCHOR, PullUpCteAnchor::new),
884-
custom(RuleType.CTE_INLINE, CTEInline::new)
885-
),
886-
topic("process limit session variables",
887-
custom(RuleType.ADD_DEFAULT_LIMIT, AddDefaultLimit::new)
888-
),
889-
topic("record query tmp plan for mv pre rewrite",
890-
custom(RuleType.RECORD_PLAN_FOR_MV_PRE_REWRITE, RecordPlanForMvPreRewrite::new)
891-
),
892-
topic("rewrite cte sub-tree before sub path push down",
893-
custom(RuleType.REWRITE_CTE_CHILDREN,
894-
() -> new RewriteCteChildren(beforePushDownJobs, runCboRules)
919+
if (needSubPathPushDown) {
920+
rewriteJobs.addAll(jobs(
921+
topic("variant element_at push down",
922+
custom(RuleType.VARIANT_SUB_PATH_PRUNING, VariantSubPathPruning::new)
895923
)
896-
)));
897-
rewriteJobs.addAll(jobs(topic("convert outer join to anti",
898-
custom(RuleType.CONVERT_OUTER_JOIN_TO_ANTI, ConvertOuterJoinToAntiJoin::new))));
899-
rewriteJobs.addAll(jobs(topic("eliminate group by key by uniform",
900-
custom(RuleType.ELIMINATE_GROUP_BY_KEY_BY_UNIFORM, EliminateGroupByKeyByUniform::new))));
901-
if (needOrExpansion) {
902-
rewriteJobs.addAll(jobs(topic("or expansion",
903-
custom(RuleType.OR_EXPANSION, () -> OrExpansion.INSTANCE))));
904-
}
905-
906-
rewriteJobs.addAll(jobs(topic("split multi distinct",
907-
custom(RuleType.DISTINCT_AGG_STRATEGY_SELECTOR, () -> DistinctAggStrategySelector.INSTANCE))));
908-
909-
// Rewrite search function before VariantSubPathPruning
910-
// so that ElementAt expressions from search can be processed
911-
rewriteJobs.addAll(jobs(
912-
bottomUp(new RewriteSearchToSlots())
913-
));
914-
915-
if (needSubPathPushDown) {
916-
rewriteJobs.addAll(jobs(
917-
topic("variant element_at push down",
918-
custom(RuleType.VARIANT_SUB_PATH_PRUNING, VariantSubPathPruning::new)
924+
));
925+
}
926+
rewriteJobs.add(
927+
topic("nested column prune",
928+
custom(RuleType.NESTED_COLUMN_PRUNING, NestedColumnPruning::new)
919929
)
930+
);
931+
rewriteJobs.addAll(jobs(
932+
topic("rewrite cte sub-tree after sub path push down",
933+
custom(RuleType.CLEAR_CONTEXT_STATUS, ClearContextStatus::new),
934+
custom(RuleType.REWRITE_CTE_CHILDREN,
935+
() -> new RewriteCteChildren(afterPushDownJobs, runCboRules)
936+
)
937+
),
938+
topic("whole plan check",
939+
custom(RuleType.ADJUST_NULLABLE, () -> new AdjustNullable(false))
940+
),
941+
// NullableDependentExpressionRewrite need to be done after nullable fixed
942+
topic("condition function", bottomUp(ImmutableList.of(
943+
new NullableDependentExpressionRewrite())))
920944
));
945+
return rewriteJobs;
921946
}
922-
rewriteJobs.add(
923-
topic("nested column prune",
924-
custom(RuleType.NESTED_COLUMN_PRUNING, NestedColumnPruning::new)
925-
)
926-
);
927-
rewriteJobs.addAll(jobs(
928-
topic("rewrite cte sub-tree after sub path push down",
929-
custom(RuleType.CLEAR_CONTEXT_STATUS, ClearContextStatus::new),
930-
custom(RuleType.REWRITE_CTE_CHILDREN,
931-
() -> new RewriteCteChildren(afterPushDownJobs, runCboRules)
932-
)
933-
),
934-
topic("whole plan check",
935-
custom(RuleType.ADJUST_NULLABLE, () -> new AdjustNullable(false))
936-
),
937-
// NullableDependentExpressionRewrite need to be done after nullable fixed
938-
topic("condition function", bottomUp(ImmutableList.of(
939-
new NullableDependentExpressionRewrite())))
940-
));
941-
return rewriteJobs;
942-
}
943-
);
947+
));
948+
return builder.build();
944949
}
945950

946951
@Override
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
suite("subquery_in_cte") {
18+
sql """
19+
create table t1(a1 int,b1 int)
20+
properties("replication_num" = "1");
21+
22+
insert into t1 values(1,2);
23+
24+
create table t2(a2 int,b2 int)
25+
properties("replication_num" = "1");
26+
27+
insert into t2 values(1,3);
28+
"""
29+
30+
sql"""
31+
with cte1 as (
32+
select t1.a1, t1.b1
33+
from t1
34+
where t1.a1 > 0 and exists (select 1 from t2 where t1.a1 = t2.a2 or t1.a1 = t2.b2)
35+
)
36+
select * from cte1 union all select * from cte1;
37+
"""
38+
39+
/* TEST PURPOSE: APPLY_TO_JOIN should be applied on CTE producer before REWRITE_CTE_CHILDREN,
40+
considering the following analyzed plan, StatsDerive reports NPE when visiting LogicalFilter[26],
41+
a1#1 is not from its child's outputs.
42+
----------------------------------------------------------------------------------------------------------------
43+
Explain String(Nereids Planner)
44+
LogicalResultSink[74] ( outputExprs=[a1#9, b1#10] )
45+
+--LogicalCteAnchor[73] ( cteId=CTEId#0 )
46+
|--LogicalCteProducer[63] ( cteId=CTEId#0 )
47+
| +--LogicalSubQueryAlias ( qualifier=[cte1] )
48+
| +--LogicalProject[36] ( distinct=false, projects=[a1#1, b1#2] )
49+
| +--LogicalProject[35] ( distinct=false, projects=[a1#1, b1#2] )
50+
| +--LogicalFilter[34] ( predicates=(a1#1 > 0) )
51+
| +--LogicalProject[33] ( distinct=false, projects=[a1#1, b1#2] )
52+
| +--LogicalApply ( correlationSlot=[a1#1], correlationFilter=Optional.empty, isMarkJoin=false, isMarkJoinSlotNotNull=false, MarkJoinSlotReference=empty )
53+
| |--LogicalOlapScan ( qualified=internal.test.t1, indexName=<index_not_selected>, selectedIndexId=1767221999857, preAgg=UNSET, operativeCol=[], virtualColumns=[] )
54+
| +--LogicalProject[27] ( distinct=false, projects=[1 AS `1`#0] )
55+
| +--LogicalFilter[26] ( predicates=OR[(a1#1 = a2#3),(a1#1 = b2#4)] )
56+
| +--LogicalOlapScan ( qualified=internal.test.t2, indexName=<index_not_selected>, selectedIndexId=1767221999881, preAgg=UNSET, operativeCol=[], virtualColumns=[] )
57+
+--LogicalUnion ( qualifier=ALL, outputs=[a1#9, b1#10], regularChildrenOutputs=[[a1#5, b1#6], [a1#7, b1#8]], constantExprsList=[], hasPushedFilter=false )
58+
|--LogicalProject[65] ( distinct=false, projects=[a1#5, b1#6] )
59+
| +--LogicalCteConsumer[64] ( cteId=CTEId#0, relationId=RelationId#0, name=cte1 )
60+
+--LogicalProject[67] ( distinct=false, projects=[a1#7, b1#8] )
61+
+--LogicalCteConsumer[66] ( cteId=CTEId#0, relationId=RelationId#1, name=cte1 )
62+
*/
63+
}

0 commit comments

Comments
 (0)