Skip to content

Commit d073125

Browse files
authored
HIVE-29217: Add configuration to choose materialization strategy for CTEs (#6092)
1 parent 935c7d9 commit d073125

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1225
-1062
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hive.conf;
19+
20+
/**
21+
* Type of suggester used for common table expression (CTE) detection and materialization.
22+
*/
23+
public enum CteSuggesterType {
24+
/**
25+
* Materialization is based on the AST/SQL structure of the query. The suggester only works when the
26+
* query explicitly defines CTEs using WITH clauses. The suggester applies early during the syntactic analysis phase
27+
* of the query and materializes WITH clauses into tables using heuristics and configured thresholds.
28+
*/
29+
AST,
30+
/**
31+
* Materialization is based on the algebraic structure of the query. The suggester applies during the cost-based
32+
* optimization phase and the exact behavior can be configured via
33+
* {@link org.apache.hadoop.hive.conf.HiveConf.ConfVars#HIVE_CTE_SUGGESTER_CLASS} property.
34+
*/
35+
CBO,
36+
/**
37+
* Materialization is disabled.
38+
*/
39+
NONE;
40+
41+
public boolean enabled(HiveConf conf) {
42+
return this.name().equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE));
43+
}
44+
}

common/src/java/org/apache/hadoop/hive/conf/HiveConf.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2654,8 +2654,15 @@ public static enum ConfVars {
26542654

26552655
// CTE
26562656
@InterfaceStability.Unstable
2657-
HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class", "",
2658-
"Class for finding and suggesting common table expressions (CTEs) based on a given query. The class must implement the CommonTableExpressionSuggester interface."),
2657+
HIVE_CTE_SUGGESTER_TYPE("hive.optimize.cte.suggester.type", "AST", new StringSet("AST", "CBO", "NONE"),
2658+
"The type of the suggester that is used for finding and materializing common table expressions " +
2659+
"(CTEs) based on a given query."),
2660+
@InterfaceStability.Unstable
2661+
HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class",
2662+
"org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester",
2663+
"The class implementing the common table expression (CTE) suggester logic. This configuration is " +
2664+
"only relevant for the CBO suggester. The class must implement the CommonTableExpressionSuggester " +
2665+
"interface."),
26592666
HIVE_CTE_MATERIALIZE_THRESHOLD("hive.optimize.cte.materialize.threshold", 3,
26602667
"If the number of references to a CTE clause exceeds this threshold, Hive will materialize it\n" +
26612668
"before executing the main query block. -1 will disable this feature."),

common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.List;
2626

2727
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars;
28+
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE;
2829
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_FORMATTER;
2930
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_RESOLVER_STYLE;
3031
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_EXPLAIN_NODE_VISIT_LIMIT;
@@ -71,6 +72,13 @@ public static Collection<Object[]> generateParameters() {
7172
list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "lenient", null});
7273
list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "OTHER", "Invalid value.. expects one of [smart, strict, " +
7374
"lenient]" });
75+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "AST", null});
76+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "ast", null});
77+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "CBO", null});
78+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "cbo", null});
79+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "NONE", null});
80+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "nOnE", null});
81+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "OTHER", "Invalid value.. expects one of [ast, cbo, none]"});
7482
return list;
7583
}
7684

data/conf/perf/tpcds30tb/cte/hive-site.xml

Lines changed: 0 additions & 50 deletions
This file was deleted.

data/conf/perf/tpcds30tb/cte/tez-site.xml

Lines changed: 0 additions & 20 deletions
This file was deleted.

itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTPCDSCteCliDriver.java

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.junit.runner.RunWith;
2727
import org.junit.runners.Parameterized;
2828
import org.junit.runners.Parameterized.Parameters;
29-
import org.junit.runners.model.Statement;
3029

3130
import java.io.File;
3231
import java.util.List;
@@ -44,27 +43,8 @@ public static List<Object[]> getParameters() throws Exception {
4443
@ClassRule
4544
public static TestRule cliClassRule = adapter.buildClassRule();
4645

47-
/**
48-
* Rule for calling only {@link CliAdapter#setUp()} and {@link CliAdapter#tearDown()} before/after running each test.
49-
*
50-
* At the moment of writing this class the rule is mostly necessary for calling {@link CliAdapter#tearDown()} to avoid
51-
* state from one test pass to other (e.g., disabling one test should not disable subsequent ones).
52-
*
53-
* {@link CliAdapter#buildTestRule()} cannot not used since it is doing more than necessary for this test case. For
54-
* instance, we do not want to create and destroy the metastore after each query.
55-
*/
5646
@Rule
57-
public TestRule cliTestRule = (statement, description) -> new Statement() {
58-
@Override
59-
public void evaluate() throws Throwable {
60-
adapter.setUp();
61-
try {
62-
statement.evaluate();
63-
} finally {
64-
adapter.tearDown();
65-
}
66-
}
67-
};
47+
public TestRule cliTestRule = adapter.buildTestRule();
6848

6949
private final String name;
7050
private final File qfile;

itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,15 @@ public TPCDSCteCliConfig() {
347347
setQueryDir("ql/src/test/queries/clientpositive/perf");
348348
setLogDir("itests/qtest/target/qfile-results/clientpositive/perf/tpcds30tb/cte");
349349
setResultsDir("ql/src/test/results/clientpositive/perf/tpcds30tb/cte");
350-
setHiveConfDir("data/conf/perf/tpcds30tb/cte");
350+
setHiveConfDir("data/conf/perf/tpcds30tb/tez");
351+
Map<HiveConf.ConfVars, String> conf = new EnumMap<>(HiveConf.ConfVars.class);
352+
conf.put(HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE, "CBO");
353+
conf.put(
354+
HiveConf.ConfVars.HIVE_CTE_SUGGESTER_CLASS,
355+
"org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionPrintSuggester");
356+
conf.put(HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD, "1");
357+
conf.put(HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_FULL_AGGREGATE_ONLY, "false");
358+
setCustomConfigValueMap(conf);
351359
setClusterType(MiniClusterType.LLAP_LOCAL);
352360
setMetastoreType("postgres.tpcds");
353361
// At the moment only makes sense to check CBO plans

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CommonTableExpressionPrintSuggester.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
2323
import org.apache.hadoop.hive.ql.session.SessionState;
2424

25+
import java.util.Comparator;
2526
import java.util.List;
2627

2728
/**
@@ -35,9 +36,9 @@ public class CommonTableExpressionPrintSuggester implements CommonTableExpressio
3536
@Override
3637
public List<RelNode> suggest(final RelNode input, final Configuration configuration) {
3738
List<RelNode> result = internal.suggest(input, configuration);
38-
// Ensure CTEs are printed deterministically to avoid test flakiness
39-
result.stream().map(RelOptUtil::toString).sorted()
40-
.forEach(cte -> SessionState.getConsole().printInfo("CTE Suggestion:\n" + cte, false));
39+
// Ensure CTEs are printed and returned deterministically to avoid test flakiness
40+
result.sort(Comparator.comparing(RelOptUtil::toString));
41+
result.forEach(cte -> SessionState.getConsole().printInfo("CTE Suggestion:\n" + RelOptUtil.toString(cte), false));
4142
return result;
4243
}
4344

ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
import org.apache.hadoop.fs.Path;
136136
import org.apache.hadoop.hive.common.TableName;
137137
import org.apache.hadoop.hive.conf.Constants;
138+
import org.apache.hadoop.hive.conf.CteSuggesterType;
138139
import org.apache.hadoop.hive.conf.HiveConf;
139140
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
140141
import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
@@ -636,7 +637,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept
636637
// unfortunately making prunedPartitions immutable is not possible
637638
// here with SemiJoins not all tables are costed in CBO, so their
638639
// PartitionList is not evaluated until the run phase.
639-
getMetaData(getQB(), true);
640+
getMetaData(getQB(), CteSuggesterType.CBO.enabled(conf));
640641

641642
disableJoinMerge = defaultJoinMerge;
642643
sinkOp = genPlan(getQB());
@@ -1727,7 +1728,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
17271728
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.POSTJOIN_ORDERING);
17281729
// Perform the CTE rewriting near the end of CBO transformations to avoid interference of the new HiveTableSpool
17291730
// operator with other rules (especially those related to constant folding and branch pruning).
1730-
if (!forViewCreation) {
1731+
if (!forViewCreation && CteSuggesterType.CBO.enabled(conf)) {
17311732
calcitePlan = applyCteRewriting(planner, calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
17321733
if (LOG.isDebugEnabled()) {
17331734
LOG.debug("Plan after CTE rewriting:\n{}", RelOptUtil.toString(calcitePlan));

ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
9292
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
9393
import org.apache.hadoop.hive.conf.Constants;
94+
import org.apache.hadoop.hive.conf.CteSuggesterType;
9495
import org.apache.hadoop.hive.conf.HiveConf;
9596
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
9697
import org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat;
@@ -13064,7 +13065,7 @@ protected boolean analyzeAndResolveChildTree(ASTNode child, PlannerContext plann
1306413065

1306513066
// Resolve Parse Tree and Get Metadata
1306613067
// Materialization is allowed if it is not a view definition
13067-
getMetaData(qb, createVwDesc == null && !forViewCreation);
13068+
getMetaData(qb, createVwDesc == null && !forViewCreation && CteSuggesterType.AST.enabled(conf));
1306813069
LOG.info("Completed getting MetaData in Semantic Analysis");
1306913070

1307013071
return true;

0 commit comments

Comments
 (0)