From be72faf375ac75a44e487ef560abba01ef05b8f9 Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Fri, 29 May 2026 23:51:45 +0530 Subject: [PATCH 1/3] HIVE-29634: Fix semijoin RHS column registration for multiple columns from same alias --- .../hive/ql/parse/SemanticAnalyzer.java | 5 +++ .../clientpositive/join_common_rhs_alias.q | 8 +++++ .../llap/join_common_rhs_alias.q.out | 35 +++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 ql/src/test/queries/clientpositive/join_common_rhs_alias.q create mode 100644 ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index b23871278c51..6db9bfd88225 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -3072,6 +3072,11 @@ void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, if (rightAliases.size() > rhssize) { // the new table is rhs table rhsAlias = rightAliases.get(rightAliases.size() - 1); + } else if (condn.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { + String alias = unescapeIdentifier(condn.getChild(0).getChild(0).getText().toLowerCase()); + if (isPresent(joinTree.getRightAliases(), alias)) { + rhsAlias = alias; + } } parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), diff --git a/ql/src/test/queries/clientpositive/join_common_rhs_alias.q b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q new file mode 100644 index 000000000000..b691f6c88039 --- /dev/null +++ b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q @@ -0,0 +1,8 @@ +CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING); + +INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a"); + +SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2 +ON t1.c1 == t2.c1 +AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 ) +WHERE t2.c1 IS NULL; diff --git a/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out new file mode 100644 index 000000000000..1ce5e8286210 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out @@ -0,0 +1,35 @@ +PREHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +PREHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tab +POSTHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tab +POSTHOOK: Lineage: tab.c1 SCRIPT [] +POSTHOOK: Lineage: tab.c2 SCRIPT [] +POSTHOOK: Lineage: tab.c3 SCRIPT [] +PREHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2 +ON t1.c1 == t2.c1 +AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 ) +WHERE t2.c1 IS NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@tab +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2 +ON t1.c1 == t2.c1 +AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 ) +WHERE t2.c1 IS NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tab +#### A masked pattern was here #### +a a aa +b b ba From f9c1c310fd60dd3d9d47a7c7141910aca629887a Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Sun, 31 May 2026 22:25:14 +0530 Subject: [PATCH 2/3] Update output for the failing semijoin6 Qtest --- .../test/results/clientpositive/llap/semijoin6.q.out | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ql/src/test/results/clientpositive/llap/semijoin6.q.out b/ql/src/test/results/clientpositive/llap/semijoin6.q.out index 0c7e9d4f441b..327b12ec0509 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin6.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin6.q.out @@ -777,20 +777,20 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col3 Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col2 (type: int), _col2 (type: int) + keys: _col0 (type: int), _col3 (type: int), _col3 (type: int), _col3 (type: int) minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs From e186cdef1a58b4fbd10da7ba62504cc37a2b113d Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Tue, 2 Jun 2026 10:36:13 +0530 Subject: [PATCH 3/3] Refactor approach to use the existing recursive call --- .../hive/ql/parse/SemanticAnalyzer.java | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6db9bfd88225..7781ded7bfa7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -3065,19 +3065,23 @@ void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn, && condn.getToken().getType() == HiveParser.DOT) { // get the semijoin rhs table name and field name fields1 = new ArrayList(); - int rhssize = rightAliases.size(); + List scopedLeftAliases = new ArrayList<>(); + List scopedRightAliases = new ArrayList<>(); + parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0), - leftAliases, rightAliases, null, aliasToOpInfo); - String rhsAlias = null; - - if (rightAliases.size() > rhssize) { // the new table is rhs table - rhsAlias = rightAliases.get(rightAliases.size() - 1); - } else if (condn.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { - String alias = unescapeIdentifier(condn.getChild(0).getChild(0).getText().toLowerCase()); - if (isPresent(joinTree.getRightAliases(), alias)) { - rhsAlias = alias; + scopedLeftAliases, scopedRightAliases, null, aliasToOpInfo); + + String rhsAlias = scopedRightAliases.isEmpty() ? null : scopedRightAliases.get(0); + scopedLeftAliases.forEach(alias -> { + if (!leftAliases.contains(alias)) { + leftAliases.add(alias); } - } + }); + scopedRightAliases.forEach(alias -> { + if (!rightAliases.contains(alias)) { + rightAliases.add(alias); + } + }); parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1), leftAliases, rightAliases, fields1, aliasToOpInfo);