Skip to content

Commit cb49dc0

Browse files
committed
HIVE-29249: RuntimeException in PlanModifierForASTConv.introduceDerivedTable for queries with self joins
The problem occurs when we need to introduce a derived table over the left input of the join but the input is already modified (newL) by the shuttle. Looking for the modified input in the join's children fails and raises an exception. Since we know which input needs to be replaced we can avoid the lookup mechanism in `introduceDerivedTable(RelNode, RelNode)` and build directly the new join operator via copy. To avoid unnecessary object creation bubling further up via the HiveRelShuttle mechansim we perform the copy *only* when one of the inputs is modified.
1 parent d46d900 commit cb49dc0

File tree

3 files changed

+41
-10
lines changed

3 files changed

+41
-10
lines changed

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,12 +229,16 @@ public RelNode visit(HiveJoin join) {
229229
// No self-join detected, return the join as is
230230
aliases.addAll(lf.aliases);
231231
aliases.addAll(rf.aliases);
232+
} else {
233+
// Self-join detected, introduce a derived table for the left side
234+
aliases.addAll(rf.aliases);
235+
newL = introduceDerivedTable(newL);
236+
}
237+
if (newL == join.getLeft() && newR == join.getRight()) {
238+
return join;
239+
} else {
232240
return join.copy(join.getTraitSet(), Arrays.asList(newL, newR));
233241
}
234-
// Self-join detected, introduce a derived table for the left side
235-
aliases.addAll(rf.aliases);
236-
introduceDerivedTable(newL, join);
237-
return join;
238242
}
239243

240244
@Override

ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,9 @@ create table t1 (key int, value int);
33
explain cbo
44
with cte as
55
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
6-
select * from cte a join cte b join cte c
6+
select * from cte a join cte b join cte c;
7+
8+
explain cbo
9+
with cte as
10+
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
11+
select * from cte a join t1 b join cte c;

ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,32 @@ POSTHOOK: Input: default@t1
2525
CBO PLAN:
2626
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$12], value0=[$13], BLOCK__OFFSET__INSIDE__FILE0=[$14], INPUT__FILE__NAME0=[$15], ROW__ID0=[$16], ROW__IS__DELETED0=[$17], key1=[$6], value1=[$7], BLOCK__OFFSET__INSIDE__FILE1=[$8], INPUT__FILE__NAME1=[$9], ROW__ID1=[$10], ROW__IS__DELETED1=[$11])
2727
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
28-
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6], value0=[$7], BLOCK__OFFSET__INSIDE__FILE0=[$8], INPUT__FILE__NAME0=[$9], ROW__ID0=[$10], ROW__IS__DELETED0=[$11])
29-
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
30-
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5])
31-
HiveTableScan(table=[[default, t1]], table:alias=[t1])
32-
HiveTableScan(table=[[default, t1]], table:alias=[t1])
28+
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
29+
HiveTableScan(table=[[default, t1]], table:alias=[t1])
30+
HiveTableScan(table=[[default, t1]], table:alias=[t1])
3331
HiveTableScan(table=[[default, t1]], table:alias=[t1])
3432

33+
Warning: Shuffle Join MERGEJOIN[13][tables = [t1, $hdt$_0]] in Stage 'Reducer 2' is a cross product
34+
Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, t1]] in Stage 'Reducer 3' is a cross product
35+
PREHOOK: query: explain cbo
36+
with cte as
37+
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
38+
select * from cte a join t1 b join cte c
39+
PREHOOK: type: QUERY
40+
PREHOOK: Input: default@t1
41+
#### A masked pattern was here ####
42+
POSTHOOK: query: explain cbo
43+
with cte as
44+
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
45+
select * from cte a join t1 b join cte c
46+
POSTHOOK: type: QUERY
47+
POSTHOOK: Input: default@t1
48+
#### A masked pattern was here ####
49+
CBO PLAN:
50+
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
51+
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
52+
HiveTableScan(table=[[default, t1]], table:alias=[t1])
53+
HiveProject(key=[$0], value=[$1])
54+
HiveTableScan(table=[[default, t1]], table:alias=[b])
55+
HiveTableScan(table=[[default, t1]], table:alias=[t1])
56+

0 commit comments

Comments
 (0)