Skip to content

Commit a7da724

Browse files
committed
HIVE-29217: Add configuration to choose materialization strategy for CTEs
1 parent d46d900 commit a7da724

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1129
-1053
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hive.conf;
19+
20+
/**
21+
* Type of suggester used for common table expression (CTE) detection and materialization.
22+
*/
23+
public enum CteSuggesterType {
24+
/**
25+
* Materialization is based on the AST/SQL structure of the query. The suggester only works when the
26+
* query explicitly defines CTEs using WITH clauses. The suggester applies early during the syntactic analysis phase
27+
* of the query and materializes WITH clauses into tables using heuristics and configured thresholds.
28+
*/
29+
AST,
30+
/**
31+
* Materialization is based on the algebraic structure of the query. The suggester applies during the cost-based
32+
* optimization phase and the exact behavior can be configured via
33+
* {@link org.apache.hadoop.hive.conf.HiveConf.ConfVars#HIVE_CTE_SUGGESTER_CLASS} property.
34+
*/
35+
CBO,
36+
/**
37+
* Materialization is disabled.
38+
*/
39+
NONE;
40+
41+
public boolean enabled(HiveConf conf) {
42+
return this.name().equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE));
43+
}
44+
}

common/src/java/org/apache/hadoop/hive/conf/HiveConf.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2767,8 +2767,15 @@ public static enum ConfVars {
27672767

27682768
// CTE
27692769
@InterfaceStability.Unstable
2770-
HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class", "",
2771-
"Class for finding and suggesting common table expressions (CTEs) based on a given query. The class must implement the CommonTableExpressionSuggester interface."),
2770+
HIVE_CTE_SUGGESTER_TYPE("hive.optimize.cte.suggester.type", "AST", new StringSet("AST", "CBO", "NONE"),
2771+
"The type of the suggester that is used for finding and materializing common table expressions " +
2772+
"(CTEs) based on a given query."),
2773+
@InterfaceStability.Unstable
2774+
HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class",
2775+
"org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester",
2776+
"The class implementing the common table expression (CTE) suggester logic. This configuration is " +
2777+
"only relevant for the CBO suggester. The class must implement the CommonTableExpressionSuggester " +
2778+
"interface."),
27722779
HIVE_CTE_MATERIALIZE_THRESHOLD("hive.optimize.cte.materialize.threshold", 3,
27732780
"If the number of references to a CTE clause exceeds this threshold, Hive will materialize it\n" +
27742781
"before executing the main query block. -1 will disable this feature."),

common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.List;
2626

2727
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars;
28+
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE;
2829
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_FORMATTER;
2930
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_RESOLVER_STYLE;
3031
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_EXPLAIN_NODE_VISIT_LIMIT;
@@ -71,6 +72,13 @@ public static Collection<Object[]> generateParameters() {
7172
list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "lenient", null});
7273
list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "OTHER", "Invalid value.. expects one of [smart, strict, " +
7374
"lenient]" });
75+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "AST", null});
76+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "ast", null});
77+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "CBO", null});
78+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "cbo", null});
79+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "NONE", null});
80+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "nOnE", null});
81+
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "OTHER", "Invalid value.. expects one of [ast, cbo, none]"});
7482
return list;
7583
}
7684

data/conf/perf/tpcds30tb/cte/hive-site.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,20 @@
4343
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
4444
</property>
4545
<!-- Properties for configuring the CTE behavior -->
46+
<property>
47+
<name>hive.optimize.cte.suggester.type</name>
48+
<value>CBO</value>
49+
</property>
4650
<property>
4751
<name>hive.optimize.cte.suggester.class</name>
4852
<value>org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionPrintSuggester</value>
4953
</property>
54+
<property>
55+
<name>hive.optimize.cte.materialize.threshold</name>
56+
<value>1</value>
57+
</property>
58+
<property>
59+
<name>hive.optimize.cte.materialize.full.aggregate.only</name>
60+
<value>false</value>
61+
</property>
5062
</configuration>

itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTPCDSCteCliDriver.java

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.junit.runner.RunWith;
2727
import org.junit.runners.Parameterized;
2828
import org.junit.runners.Parameterized.Parameters;
29-
import org.junit.runners.model.Statement;
3029

3130
import java.io.File;
3231
import java.util.List;
@@ -44,27 +43,8 @@ public static List<Object[]> getParameters() throws Exception {
4443
@ClassRule
4544
public static TestRule cliClassRule = adapter.buildClassRule();
4645

47-
/**
48-
* Rule for calling only {@link CliAdapter#setUp()} and {@link CliAdapter#tearDown()} before/after running each test.
49-
*
50-
* At the moment of writing this class the rule is mostly necessary for calling {@link CliAdapter#tearDown()} to avoid
51-
* state from one test pass to other (e.g., disabling one test should not disable subsequent ones).
52-
*
53-
* {@link CliAdapter#buildTestRule()} cannot not used since it is doing more than necessary for this test case. For
54-
* instance, we do not want to create and destroy the metastore after each query.
55-
*/
5646
@Rule
57-
public TestRule cliTestRule = (statement, description) -> new Statement() {
58-
@Override
59-
public void evaluate() throws Throwable {
60-
adapter.setUp();
61-
try {
62-
statement.evaluate();
63-
} finally {
64-
adapter.tearDown();
65-
}
66-
}
67-
};
47+
public TestRule cliTestRule = adapter.buildTestRule();
6848

6949
private final String name;
7050
private final File qfile;

itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
import java.net.URL;
2323
import java.util.HashMap;
2424
import java.util.Map;
25+
import java.util.Set;
2526

27+
import com.google.common.collect.ImmutableSet;
2628
import org.apache.hadoop.hive.conf.HiveConf;
2729
import org.apache.hadoop.hive.ql.QTestMiniClusters;
2830
import org.apache.hadoop.hive.ql.QTestMiniClusters.MiniClusterType;
@@ -349,7 +351,11 @@ public TPCDSCteCliConfig() {
349351
setClusterType(MiniClusterType.LLAP_LOCAL);
350352
setMetastoreType("postgres.tpcds");
351353
// At the moment only makes sense to check CBO plans
354+
Set<Integer> skipQueries = ImmutableSet.of(64); // Skipped due to HIVE-29249
352355
for (int i = 1; i < 100; i++) {
356+
if (skipQueries.contains(i)) {
357+
continue;
358+
}
353359
includeQuery("cbo_query" + i + ".q");
354360
}
355361
}

ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
import org.apache.hadoop.fs.Path;
136136
import org.apache.hadoop.hive.common.TableName;
137137
import org.apache.hadoop.hive.conf.Constants;
138+
import org.apache.hadoop.hive.conf.CteSuggesterType;
138139
import org.apache.hadoop.hive.conf.HiveConf;
139140
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
140141
import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
@@ -635,7 +636,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept
635636
// unfortunately making prunedPartitions immutable is not possible
636637
// here with SemiJoins not all tables are costed in CBO, so their
637638
// PartitionList is not evaluated until the run phase.
638-
getMetaData(getQB(), true);
639+
getMetaData(getQB(), CteSuggesterType.CBO.enabled(conf));
639640

640641
disableJoinMerge = defaultJoinMerge;
641642
sinkOp = genPlan(getQB());
@@ -1743,7 +1744,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
17431744
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.POSTJOIN_ORDERING);
17441745
// Perform the CTE rewriting near the end of CBO transformations to avoid interference of the new HiveTableSpool
17451746
// operator with other rules (especially those related to constant folding and branch pruning).
1746-
if (!forViewCreation) {
1747+
if (!forViewCreation && CteSuggesterType.CBO.enabled(conf)) {
17471748
calcitePlan = applyCteRewriting(planner, calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
17481749
if (LOG.isDebugEnabled()) {
17491750
LOG.debug("Plan after CTE rewriting:\n{}", RelOptUtil.toString(calcitePlan));

ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
9292
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
9393
import org.apache.hadoop.hive.conf.Constants;
94+
import org.apache.hadoop.hive.conf.CteSuggesterType;
9495
import org.apache.hadoop.hive.conf.HiveConf;
9596
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
9697
import org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat;
@@ -13056,7 +13057,7 @@ protected boolean analyzeAndResolveChildTree(ASTNode child, PlannerContext plann
1305613057

1305713058
// Resolve Parse Tree and Get Metadata
1305813059
// Materialization is allowed if it is not a view definition
13059-
getMetaData(qb, createVwDesc == null && !forViewCreation);
13060+
getMetaData(qb, createVwDesc == null && !forViewCreation && CteSuggesterType.AST.enabled(conf));
1306013061
LOG.info("Completed getting MetaData in Semantic Analysis");
1306113062

1306213063
return true;

ql/src/test/queries/clientpositive/cte_cbo_plan_json.q

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ CREATE TABLE emps
77
);
88

99
set hive.optimize.cte.materialize.threshold=1;
10-
set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester;
10+
set hive.optimize.cte.suggester.type=CBO;
1111
set hive.optimize.cte.materialize.full.aggregate.only=false;
1212

1313
EXPLAIN FORMATTED CBO

ql/src/test/queries/clientpositive/cte_cbo_rewrite_0.q

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ HAVING AVG(e.salary) < 100000;
5959

6060
set hive.optimize.cte.materialize.threshold=1;
6161
set hive.optimize.cte.materialize.full.aggregate.only=false;
62-
set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester;
62+
set hive.optimize.cte.suggester.type=CBO;
6363

6464
SELECT d.name, 'HIGH'
6565
FROM emps e

0 commit comments

Comments
 (0)