Skip to content

Commit fc31408

Browse files
Merge pull request ClickHouse#79942 from ClickHouse/allow-join-using-alias
Allow specify alias in JOIN USING
2 parents 8439810 + de27fd5 commit fc31408

17 files changed

+204
-17
lines changed

src/Analyzer/JoinNode.cpp

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <Analyzer/JoinNode.h>
2+
#include <Analyzer/ColumnNode.h>
23
#include <Analyzer/ListNode.h>
34
#include <Analyzer/Utils.h>
45
#include <IO/Operators.h>
@@ -37,6 +38,77 @@ JoinNode::JoinNode(QueryTreeNodePtr left_table_expression_,
3738
children[join_expression_child_index] = std::move(join_expression_);
3839
}
3940

41+
/// There is a special workaround for the case when ARRAY JOIN alias is used in USING statement.
42+
/// Example: ... ARRAY JOIN arr AS dummy INNER JOIN system.one USING (dummy);
43+
///
44+
/// In case of ARRAY JOIN, the column is renamed, so the query tree will look like:
45+
/// JOIN EXPRESSION
46+
/// LIST
47+
/// COLUMN id: 16, column_name: dummy
48+
/// EXPRESSION
49+
/// LIST
50+
/// COLUMN id: 18, column_name: __array_join_exp_1
51+
/// COLUMN id: 19, column_name: dummy
52+
///
53+
/// Previously, when we convert QueryTree back to ast, the query would look like:
54+
/// ARRAY JOIN arr AS __array_join_exp_1 ALL INNER JOIN system.one USING (__array_join_exp_1)
55+
/// Which is incorrect query (which is broken in distributed case) because system.one do not have __array_join_exp_1.
56+
///
57+
/// In order to mitigate this, the syntax 'USING (__array_join_exp_1 AS dummy)' is introduced,
58+
/// which means that '__array_join_exp_1' is taken from left, 'dummy' is taken from right,
59+
/// and the USING column name is also 'dummy'
60+
///
61+
/// See 03448_analyzer_array_join_alias_in_join_using_bug
62+
static ASTPtr tryMakeUsingColumnASTWithAlias(const QueryTreeNodePtr & node)
63+
{
64+
const auto * column_node = node->as<ColumnNode>();
65+
if (!column_node)
66+
return nullptr;
67+
68+
const auto & expr = column_node->getExpression();
69+
if (!expr)
70+
return nullptr;
71+
72+
const auto * expr_list_node = expr->as<ListNode>();
73+
if (!expr_list_node)
74+
return nullptr;
75+
76+
if (expr_list_node->getNodes().size() != 2)
77+
return nullptr;
78+
79+
const auto * lhs_column_node = expr_list_node->getNodes()[0]->as<ColumnNode>();
80+
const auto * rhs_column_node = expr_list_node->getNodes()[1]->as<ColumnNode>();
81+
if (!lhs_column_node || !rhs_column_node)
82+
return nullptr;
83+
84+
if (lhs_column_node->getColumnName() == rhs_column_node->getColumnName())
85+
return nullptr;
86+
87+
auto node_ast = std::make_shared<ASTIdentifier>(lhs_column_node->getColumnName());
88+
node_ast->setAlias(rhs_column_node->getColumnName());
89+
return node_ast;
90+
}
91+
92+
static ASTPtr makeUsingAST(const QueryTreeNodePtr & node)
93+
{
94+
const auto & list_node = node->as<ListNode &>();
95+
96+
auto expr_list = std::make_shared<ASTExpressionList>();
97+
expr_list->children.reserve(list_node.getNodes().size());
98+
99+
for (const auto & child : list_node.getNodes())
100+
{
101+
ASTPtr node_ast = tryMakeUsingColumnASTWithAlias(child);
102+
103+
if (!node_ast)
104+
node_ast = child->toAST();
105+
106+
expr_list->children.push_back(std::move(node_ast));
107+
}
108+
109+
return expr_list;
110+
}
111+
40112
ASTPtr JoinNode::toASTTableJoin() const
41113
{
42114
auto join_ast = std::make_shared<ASTTableJoin>();
@@ -46,16 +118,14 @@ ASTPtr JoinNode::toASTTableJoin() const
46118

47119
if (children[join_expression_child_index])
48120
{
49-
auto join_expression_ast = children[join_expression_child_index]->toAST();
50-
51121
if (is_using_join_expression)
52122
{
53-
join_ast->using_expression_list = join_expression_ast;
123+
join_ast->using_expression_list = makeUsingAST(children[join_expression_child_index]);
54124
join_ast->children.push_back(join_ast->using_expression_list);
55125
}
56126
else
57127
{
58-
join_ast->on_expression = join_expression_ast;
128+
join_ast->on_expression = children[join_expression_child_index]->toAST();
59129
join_ast->children.push_back(join_ast->on_expression);
60130
}
61131
}

src/Analyzer/Resolve/QueryAnalyzer.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5307,9 +5307,11 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
53075307
if (settings[Setting::analyzer_compatibility_join_using_top_level_identifier])
53085308
result_left_table_expression = try_resolve_identifier_from_query_projection(identifier_full_name, join_node_typed.getLeftTableExpression(), scope);
53095309

5310-
IdentifierLookup identifier_lookup{identifier_node->getIdentifier(), IdentifierLookupContext::EXPRESSION};
53115310
if (!result_left_table_expression)
5311+
{
5312+
IdentifierLookup identifier_lookup{identifier_node->getIdentifier(), IdentifierLookupContext::EXPRESSION};
53125313
result_left_table_expression = identifier_resolver.tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getLeftTableExpression(), scope).resolved_identifier;
5314+
}
53135315

53145316
/** Here we may try to resolve identifier from projection in case it's not resolved from left table expression
53155317
* and analyzer_compatibility_join_using_top_level_identifier is disabled.
@@ -5353,19 +5355,23 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
53535355
identifier_full_name,
53545356
scope.scope_node->formatASTForErrorMessage());
53555357

5358+
/// Here we allow syntax 'USING (a AS b)' which means that 'a' is taken from left and 'b' is taken from right.
5359+
/// See 03449_join_using_allow_alias.sql and the comment in JoinNode.cpp
5360+
const auto & right_name = identifier_node->hasAlias() ? identifier_node->getAlias() : identifier_full_name;
5361+
IdentifierLookup identifier_lookup{Identifier(right_name), IdentifierLookupContext::EXPRESSION};
53565362
auto result_right_table_expression = identifier_resolver.tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getRightTableExpression(), scope).resolved_identifier;
53575363
if (!result_right_table_expression)
53585364
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
53595365
"JOIN {} using identifier '{}' cannot be resolved from right table expression. In scope {}",
53605366
join_node_typed.formatASTForErrorMessage(),
5361-
identifier_full_name,
5367+
right_name,
53625368
scope.scope_node->formatASTForErrorMessage());
53635369

53645370
if (result_right_table_expression->getNodeType() != QueryTreeNodeType::COLUMN)
53655371
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
53665372
"JOIN {} using identifier '{}' must be resolved into column node from right table expression. In scope {}",
53675373
join_node_typed.formatASTForErrorMessage(),
5368-
identifier_full_name,
5374+
right_name,
53695375
scope.scope_node->formatASTForErrorMessage());
53705376

53715377
auto expression_types = DataTypes{result_left_table_expression->getResultType(), result_right_table_expression->getResultType()};
@@ -5377,7 +5383,7 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
53775383
join_node_typed.formatASTForErrorMessage(),
53785384
result_left_table_expression->getResultType()->getName(),
53795385
result_right_table_expression->getResultType()->getName(),
5380-
identifier_full_name,
5386+
right_name,
53815387
scope.scope_node->formatASTForErrorMessage());
53825388

53835389
NameAndTypePair join_using_column(identifier_full_name, common_type);

src/Interpreters/ClusterProxy/executeQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ void executeQueryWithParallelReplicas(
749749
{
750750
QueryTreeNodePtr modified_query_tree = query_tree->clone();
751751
rewriteJoinToGlobalJoin(modified_query_tree, context);
752-
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
752+
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree, /*allow_global_join_for_right_table*/ true);
753753

754754
auto header
755755
= InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());

src/Parsers/ASTTablesInSelectQuery.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include <IO/Operators.h>
66
#include <Parsers/ASTFunction.h>
77

8-
98
namespace DB
109
{
1110

@@ -252,6 +251,10 @@ void ASTTableJoin::formatImplAfterTable(WriteBuffer & ostr, const FormatSettings
252251
{
253252
ostr << (settings.hilite ? hilite_keyword : "") << " USING " << (settings.hilite ? hilite_none : "");
254253
ostr << "(";
254+
/// We should always print alias for 'USING (a AS b)' syntax (supported with analyzer only).
255+
/// Otherwise query like 'SELECT a AS b FROM t1 JOIN t2 USING (a AS b)' will be broken.
256+
/// See 03448_analyzer_array_join_alias_in_join_using_bug.sql
257+
frame.ignore_printed_asts_with_alias = true;
255258
using_expression_list->format(ostr, settings, state, frame);
256259
ostr << ")";
257260
}

src/Parsers/ASTWithAlias.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ void ASTWithAlias::formatImpl(WriteBuffer & ostr, const FormatSettings & setting
1919
{
2020
/// If we have previously output this node elsewhere in the query, now it is enough to output only the alias.
2121
/// This is needed because the query can become extraordinary large after substitution of aliases.
22-
if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash(/*ignore_aliases=*/ true)).second)
22+
if (!alias.empty() && !frame.ignore_printed_asts_with_alias && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash(/*ignore_aliases=*/ true)).second)
2323
{
2424
ostr << (settings.hilite ? IAST::hilite_identifier : "");
2525
settings.writeIdentifier(ostr, alias, /*ambiguous=*/false);

src/Parsers/IAST.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ class IAST : public std::enable_shared_from_this<IAST>, public TypePromotion<IAS
249249
bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element.
250250
bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required)
251251
bool surround_each_list_element_with_parens = false;
252+
bool ignore_printed_asts_with_alias = false; /// Ignore FormatState::printed_asts_with_alias
252253
bool allow_operators = true; /// Format some functions, such as "plus", "in", etc. as operators.
253254
size_t list_element_index = 0;
254255
std::string create_engine_name;

src/Planner/findParallelReplicasQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
494494
modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
495495

496496
rewriteJoinToGlobalJoin(modified_query_tree, context);
497-
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
497+
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree, /*allow_global_join_for_right_table*/ true);
498498
ASTPtr modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
499499

500500
Block header = InterpreterSelectQueryAnalyzer::getSampleBlock(

src/Storages/StorageDistributed.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,7 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
930930
rewriteJoinToGlobalJoinIfNeeded(query_node.getJoinTree());
931931
}
932932

933-
return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify);
933+
return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify, /*allow_global_join_for_right_table*/ false);
934934
}
935935

936936
}

src/Storages/buildQueryTreeForShard.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ QueryTreeNodePtr getSubqueryFromTableExpression(
352352

353353
}
354354

355-
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify)
355+
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify, bool allow_global_join_for_right_table)
356356
{
357357
CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor;
358358
collect_column_source_to_columns_visitor.visit(query_tree_to_modify);
@@ -373,7 +373,7 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex
373373
{
374374
QueryTreeNodePtr join_table_expression;
375375
const auto join_kind = join_node->getKind();
376-
if (join_kind == JoinKind::Left || join_kind == JoinKind::Inner)
376+
if (!allow_global_join_for_right_table || join_kind == JoinKind::Left || join_kind == JoinKind::Inner)
377377
{
378378
join_table_expression = join_node->getRightTableExpression();
379379
}

src/Storages/buildQueryTreeForShard.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
1616
class Context;
1717
using ContextPtr = std::shared_ptr<const Context>;
1818

19-
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify);
19+
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify, bool allow_global_join_for_right_table);
2020

2121
void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify, ContextPtr context);
2222

0 commit comments

Comments
 (0)