Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/CteSuggesterType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.conf;

/**
* Type of suggester used for common table expression (CTE) detection and materialization.
*/
public enum CteSuggesterType {
/**
* Materialization is based on the AST/SQL structure of the query. The suggester only works when the
* query explicitly defines CTEs using WITH clauses. The suggester applies early during the syntactic analysis phase
* of the query and materializes WITH clauses into tables using heuristics and configured thresholds.
*/
AST,
/**
* Materialization is based on the algebraic structure of the query. The suggester applies during the cost-based
* optimization phase and the exact behavior can be configured via
* {@link org.apache.hadoop.hive.conf.HiveConf.ConfVars#HIVE_CTE_SUGGESTER_CLASS} property.
*/
CBO,
/**
* Materialization is disabled.
*/
NONE;

public boolean enabled(HiveConf conf) {
return this.name().equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE));
}
}
11 changes: 9 additions & 2 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -2767,8 +2767,15 @@ public static enum ConfVars {

// CTE
@InterfaceStability.Unstable
HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class", "",
"Class for finding and suggesting common table expressions (CTEs) based on a given query. The class must implement the CommonTableExpressionSuggester interface."),
HIVE_CTE_SUGGESTER_TYPE("hive.optimize.cte.suggester.type", "AST", new StringSet("AST", "CBO", "NONE"),
"The type of the suggester that is used for finding and materializing common table expressions " +
"(CTEs) based on a given query."),
@InterfaceStability.Unstable
HIVE_CTE_SUGGESTER_CLASS("hive.optimize.cte.suggester.class",
"org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester",
"The class implementing the common table expression (CTE) suggester logic. This configuration is " +
"only relevant for the CBO suggester. The class must implement the CommonTableExpressionSuggester " +
"interface."),
HIVE_CTE_MATERIALIZE_THRESHOLD("hive.optimize.cte.materialize.threshold", 3,
"If the number of references to a CTE clause exceeds this threshold, Hive will materialize it\n" +
"before executing the main query block. -1 will disable this feature."),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.List;

import static org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_CTE_SUGGESTER_TYPE;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_FORMATTER;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_RESOLVER_STYLE;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_EXPLAIN_NODE_VISIT_LIMIT;
Expand Down Expand Up @@ -71,6 +72,13 @@ public static Collection<Object[]> generateParameters() {
list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "lenient", null});
list.add(new Object[] { HIVE_DATETIME_RESOLVER_STYLE, "OTHER", "Invalid value.. expects one of [smart, strict, " +
"lenient]" });
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "AST", null});
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "ast", null});
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "CBO", null});
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "cbo", null});
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "NONE", null});
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "nOnE", null});
list.add(new Object[] {HIVE_CTE_SUGGESTER_TYPE, "OTHER", "Invalid value.. expects one of [ast, cbo, none]"});
return list;
}

Expand Down
12 changes: 12 additions & 0 deletions data/conf/perf/tpcds30tb/cte/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,20 @@
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<!-- Properties for configuring the CTE behavior -->
<property>
<name>hive.optimize.cte.suggester.type</name>
<value>CBO</value>
</property>
<property>
<name>hive.optimize.cte.suggester.class</name>
<value>org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionPrintSuggester</value>
</property>
<property>
<name>hive.optimize.cte.materialize.threshold</name>
<value>1</value>
</property>
<property>
<name>hive.optimize.cte.materialize.full.aggregate.only</name>
<value>false</value>
</property>
</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import org.junit.runners.model.Statement;

import java.io.File;
import java.util.List;
Expand All @@ -44,27 +43,8 @@ public static List<Object[]> getParameters() throws Exception {
@ClassRule
public static TestRule cliClassRule = adapter.buildClassRule();

/**
* Rule for calling only {@link CliAdapter#setUp()} and {@link CliAdapter#tearDown()} before/after running each test.
*
* At the moment of writing this class the rule is mostly necessary for calling {@link CliAdapter#tearDown()} to avoid
* state from one test pass to other (e.g., disabling one test should not disable subsequent ones).
*
* {@link CliAdapter#buildTestRule()} cannot not used since it is doing more than necessary for this test case. For
* instance, we do not want to create and destroy the metastore after each query.
*/
@Rule
public TestRule cliTestRule = (statement, description) -> new Statement() {
@Override
public void evaluate() throws Throwable {
adapter.setUp();
try {
statement.evaluate();
} finally {
adapter.tearDown();
}
}
};
public TestRule cliTestRule = adapter.buildTestRule();

private final String name;
private final File qfile;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import com.google.common.collect.ImmutableSet;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QTestMiniClusters;
import org.apache.hadoop.hive.ql.QTestMiniClusters.MiniClusterType;
Expand Down Expand Up @@ -349,7 +351,11 @@ public TPCDSCteCliConfig() {
setClusterType(MiniClusterType.LLAP_LOCAL);
setMetastoreType("postgres.tpcds");
// At the moment only makes sense to check CBO plans
Set<Integer> skipQueries = ImmutableSet.of(64); // Skipped due to HIVE-29249
for (int i = 1; i < 100; i++) {
if (skipQueries.contains(i)) {
continue;
}
includeQuery("cbo_query" + i + ".q");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.CteSuggesterType;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
Expand Down Expand Up @@ -635,7 +636,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept
// unfortunately making prunedPartitions immutable is not possible
// here with SemiJoins not all tables are costed in CBO, so their
// PartitionList is not evaluated until the run phase.
getMetaData(getQB(), true);
getMetaData(getQB(), CteSuggesterType.CBO.enabled(conf));

disableJoinMerge = defaultJoinMerge;
sinkOp = genPlan(getQB());
Expand Down Expand Up @@ -1743,7 +1744,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.POSTJOIN_ORDERING);
// Perform the CTE rewriting near the end of CBO transformations to avoid interference of the new HiveTableSpool
// operator with other rules (especially those related to constant folding and branch pruning).
if (!forViewCreation) {
if (!forViewCreation && CteSuggesterType.CBO.enabled(conf)) {
calcitePlan = applyCteRewriting(planner, calcitePlan, mdProvider.getMetadataProvider(), executorProvider);
if (LOG.isDebugEnabled()) {
LOG.debug("Plan after CTE rewriting:\n{}", RelOptUtil.toString(calcitePlan));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.CteSuggesterType;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat;
Expand Down Expand Up @@ -13056,7 +13057,7 @@ protected boolean analyzeAndResolveChildTree(ASTNode child, PlannerContext plann

// Resolve Parse Tree and Get Metadata
// Materialization is allowed if it is not a view definition
getMetaData(qb, createVwDesc == null && !forViewCreation);
getMetaData(qb, createVwDesc == null && !forViewCreation && CteSuggesterType.AST.enabled(conf));
LOG.info("Completed getting MetaData in Semantic Analysis");

return true;
Expand Down
2 changes: 1 addition & 1 deletion ql/src/test/queries/clientpositive/cte_cbo_plan_json.q
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CREATE TABLE emps
);

set hive.optimize.cte.materialize.threshold=1;
set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester;
set hive.optimize.cte.suggester.type=CBO;
set hive.optimize.cte.materialize.full.aggregate.only=false;

EXPLAIN FORMATTED CBO
Expand Down
2 changes: 1 addition & 1 deletion ql/src/test/queries/clientpositive/cte_cbo_rewrite_0.q
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ HAVING AVG(e.salary) < 100000;

set hive.optimize.cte.materialize.threshold=1;
set hive.optimize.cte.materialize.full.aggregate.only=false;
set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester;
set hive.optimize.cte.suggester.type=CBO;

SELECT d.name, 'HIGH'
FROM emps e
Expand Down
4 changes: 2 additions & 2 deletions ql/src/test/queries/clientpositive/cte_mat_12.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Verify that hive.optimize.cte.materialize.full.aggregate.only behaves as expected for implicitly discovered (hive.optimize.cte.suggester.class) CTEs in the query
-- Verify that hive.optimize.cte.materialize.full.aggregate.only behaves as expected for implicitly discovered (hive.optimize.cte.suggester.type=CBO) CTEs in the query
CREATE TABLE emps
(
empid INTEGER,
Expand All @@ -8,7 +8,7 @@ CREATE TABLE emps
);

set hive.optimize.cte.materialize.threshold=1;
set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionIdentitySuggester;
set hive.optimize.cte.suggester.type=CBO;

set hive.optimize.cte.materialize.full.aggregate.only=true;

Expand Down
67 changes: 67 additions & 0 deletions ql/src/test/queries/clientpositive/cte_mat_type.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
CREATE TABLE emps
(
empid INTEGER,
deptno INTEGER,
name VARCHAR(10),
salary DECIMAL(8, 2)
);

CREATE TABLE depts
(
deptno INTEGER,
name VARCHAR(20)
);

set hive.optimize.cte.materialize.threshold=1;
set hive.optimize.cte.materialize.full.aggregate.only=false;

set hive.optimize.cte.suggester.type=AST;

EXPLAIN CBO
WITH dept_avg AS (
SELECT d.name AS d_name, AVG(e.salary) AS avg_salary
FROM emps e
INNER JOIN depts d ON e.deptno = d.deptno
GROUP BY d.name
)
SELECT d_name, 'HIGH'
FROM dept_avg da
WHERE da.avg_salary >= 100000
UNION
SELECT d_name, 'LOW'
FROM dept_avg da
WHERE da.avg_salary < 100000;

set hive.optimize.cte.suggester.type=CBO;

EXPLAIN CBO
WITH dept_avg AS (
SELECT d.name AS d_name, AVG(e.salary) AS avg_salary
FROM emps e
INNER JOIN depts d ON e.deptno = d.deptno
GROUP BY d.name
)
SELECT d_name, 'HIGH'
FROM dept_avg da
WHERE da.avg_salary >= 100000
UNION
SELECT d_name, 'LOW'
FROM dept_avg da
WHERE da.avg_salary < 100000;

set hive.optimize.cte.suggester.type=NONE;

EXPLAIN CBO
WITH dept_avg AS (
SELECT d.name AS d_name, AVG(e.salary) AS avg_salary
FROM emps e
INNER JOIN depts d ON e.deptno = d.deptno
GROUP BY d.name
)
SELECT d_name, 'HIGH'
FROM dept_avg da
WHERE da.avg_salary >= 100000
UNION
SELECT d_name, 'LOW'
FROM dept_avg da
WHERE da.avg_salary < 100000;
Loading