Skip to content

Commit 49951e8

Browse files
Fix fuzzing. Add comments
1 parent 2d850d4 commit 49951e8

File tree

10 files changed

+71
-29
lines changed

10 files changed

+71
-29
lines changed

src/Analyzer/JoinNode.cpp

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,57 @@ JoinNode::JoinNode(QueryTreeNodePtr left_table_expression_,
3838
children[join_expression_child_index] = std::move(join_expression_);
3939
}
4040

41+
/// There is a special workaround for the case when ARRAY JOIN alias is used in USING statement.
42+
/// Example: ... ARRAY JOIN arr AS dummy INNER JOIN system.one USING (dummy);
43+
///
44+
/// In case of ARRAY JOIN, the column is renamed, so the query tree will look like:
45+
/// JOIN EXPRESSION
46+
/// LIST
47+
/// COLUMN id: 16, column_name: dummy
48+
/// EXPRESSION
49+
/// LIST
50+
/// COLUMN id: 18, column_name: __array_join_exp_1
51+
/// COLUMN id: 19, column_name: dummy
52+
///
53+
/// Previously, when we convert QueryTree back to ast, the query would look like:
54+
/// ARRAY JOIN arr AS __array_join_exp_1 ALL INNER JOIN system.one USING (__array_join_exp_1)
55+
/// Which is incorrect query (which is broken in distributed case) because system.one do not have __array_join_exp_1.
56+
///
57+
/// In order to mitigate this, the syntax 'USING (__array_join_exp_1 AS dummy)' is introduced,
58+
/// which means that '__array_join_exp_1' is taken from left, 'dummy' is taken from right,
59+
/// and the USING column name is also 'dummy'
60+
///
61+
/// See 03448_analyzer_array_join_alias_in_join_using_bug
62+
static ASTPtr tryMakeUsingColumnASTWithAlias(const QueryTreeNodePtr & node)
63+
{
64+
const auto * column_node = node->as<ColumnNode>();
65+
if (!column_node)
66+
return nullptr;
67+
68+
const auto & expr = column_node->getExpression();
69+
if (!expr)
70+
return nullptr;
71+
72+
const auto * expr_list_node = expr->as<ListNode>();
73+
if (!expr_list_node)
74+
return nullptr;
75+
76+
if (expr_list_node->getNodes().size() != 2)
77+
return nullptr;
78+
79+
const auto * lhs_column_node = expr_list_node->getNodes()[0]->as<ColumnNode>();
80+
const auto * rhs_column_node = expr_list_node->getNodes()[1]->as<ColumnNode>();
81+
if (!lhs_column_node || !rhs_column_node)
82+
return nullptr;
83+
84+
if (lhs_column_node->getColumnName() == rhs_column_node->getColumnName())
85+
return nullptr;
86+
87+
auto node_ast = std::make_shared<ASTIdentifier>(lhs_column_node->getColumnName());
88+
node_ast->setAlias(rhs_column_node->getColumnName());
89+
return node_ast;
90+
}
91+
4192
static ASTPtr makeUsingAST(const QueryTreeNodePtr & node)
4293
{
4394
const auto & list_node = node->as<ListNode &>();
@@ -47,29 +98,7 @@ static ASTPtr makeUsingAST(const QueryTreeNodePtr & node)
4798

4899
for (const auto & child : list_node.getNodes())
49100
{
50-
ASTPtr node_ast;
51-
if (const auto * column_node = child->as<ColumnNode>())
52-
{
53-
if (const auto expr = column_node->getExpression())
54-
{
55-
if (const auto * expr_list_node = expr->as<ListNode>())
56-
{
57-
if (expr_list_node->getNodes().size() == 2)
58-
{
59-
const auto * lhs_column_node = expr_list_node->getNodes()[0]->as<ColumnNode>();
60-
const auto * rhs_column_node = expr_list_node->getNodes()[1]->as<ColumnNode>();
61-
if (lhs_column_node && rhs_column_node)
62-
{
63-
if (lhs_column_node->getColumnName() != rhs_column_node->getColumnName())
64-
{
65-
node_ast = std::make_shared<ASTIdentifier>(lhs_column_node->getColumnName());
66-
node_ast->setAlias(rhs_column_node->getColumnName());
67-
}
68-
}
69-
}
70-
}
71-
}
72-
}
101+
ASTPtr node_ast = tryMakeUsingColumnASTWithAlias(child);
73102

74103
if (!node_ast)
75104
node_ast = child->toAST();

src/Analyzer/Resolve/QueryAnalyzer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5355,6 +5355,8 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
53555355
identifier_full_name,
53565356
scope.scope_node->formatASTForErrorMessage());
53575357

5358+
/// Here we allow syntax 'USING (a AS b)' which means that 'a' is taken from left and 'b' is taken from right.
5359+
/// See 03449_join_using_allow_alias.sql and the comment in JoinNode.cpp
53585360
const auto & right_name = identifier_node->hasAlias() ? identifier_node->getAlias() : identifier_full_name;
53595361
IdentifierLookup identifier_lookup{Identifier(right_name), IdentifierLookupContext::EXPRESSION};
53605362
auto result_right_table_expression = identifier_resolver.tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getRightTableExpression(), scope).resolved_identifier;

src/Interpreters/ClusterProxy/executeQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ void executeQueryWithParallelReplicas(
749749
{
750750
QueryTreeNodePtr modified_query_tree = query_tree->clone();
751751
rewriteJoinToGlobalJoin(modified_query_tree, context);
752-
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
752+
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree, /*allow_global_join_for_right_table*/ true);
753753

754754
auto header
755755
= InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());

src/Parsers/ASTTablesInSelectQuery.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,9 @@ void ASTTableJoin::formatImplAfterTable(WriteBuffer & ostr, const FormatSettings
251251
{
252252
ostr << (settings.hilite ? hilite_keyword : "") << " USING " << (settings.hilite ? hilite_none : "");
253253
ostr << "(";
254+
/// We should always print alias for 'USING (a AS b)' syntax (supported with analyzer only).
255+
/// Otherwise query like 'SELECT a AS b FROM t1 JOIN t2 USING (a AS b)' will be broken.
256+
/// See 03448_analyzer_array_join_alias_in_join_using_bug.sql
254257
frame.ignore_printed_asts_with_alias = true;
255258
using_expression_list->format(ostr, settings, state, frame);
256259
ostr << ")";

src/Planner/findParallelReplicasQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
494494
modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
495495

496496
rewriteJoinToGlobalJoin(modified_query_tree, context);
497-
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
497+
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree, /*allow_global_join_for_right_table*/ true);
498498
ASTPtr modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
499499

500500
Block header = InterpreterSelectQueryAnalyzer::getSampleBlock(

src/Storages/StorageDistributed.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,7 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info,
930930
rewriteJoinToGlobalJoinIfNeeded(query_node.getJoinTree());
931931
}
932932

933-
return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify);
933+
return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify, /*allow_global_join_for_right_table*/ false);
934934
}
935935

936936
}

src/Storages/buildQueryTreeForShard.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ QueryTreeNodePtr getSubqueryFromTableExpression(
352352

353353
}
354354

355-
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify)
355+
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify, bool allow_global_join_for_right_table)
356356
{
357357
CollectColumnSourceToColumnsVisitor collect_column_source_to_columns_visitor;
358358
collect_column_source_to_columns_visitor.visit(query_tree_to_modify);
@@ -373,7 +373,7 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex
373373
{
374374
QueryTreeNodePtr join_table_expression;
375375
const auto join_kind = join_node->getKind();
376-
if (join_kind == JoinKind::Left || join_kind == JoinKind::Inner)
376+
if (!allow_global_join_for_right_table || join_kind == JoinKind::Left || join_kind == JoinKind::Inner)
377377
{
378378
join_table_expression = join_node->getRightTableExpression();
379379
}

src/Storages/buildQueryTreeForShard.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ using PlannerContextPtr = std::shared_ptr<PlannerContext>;
1616
class Context;
1717
using ContextPtr = std::shared_ptr<const Context>;
1818

19-
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify);
19+
QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_context, QueryTreeNodePtr query_tree_to_modify, bool allow_global_join_for_right_table);
2020

2121
void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify, ContextPtr context);
2222

tests/queries/0_stateless/03448_analyzer_array_join_alias_in_join_using_bug.reference

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,7 @@ INNER JOIN (SELECT 1 + number as arr_item from numbers(2)) AS foo USING (arr_ite
3636
[0,1,2] 2
3737
[0,1,2] 1
3838
[0,1,2] 2
39+
-- Fuzzed
40+
41+
SELECT arr FROM remote('127.0.0.2', currentDatabase(), local_table) AS r ARRAY JOIN arr AS arr_item GLOBAL RIGHT JOIN (SELECT 1 AS arr_item) AS foo USING (arr_item);
42+
[0,1,2]

tests/queries/0_stateless/03448_analyzer_array_join_alias_in_join_using_bug.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,7 @@ SELECT arr, arr_item
3939
FROM remote('127.0.0.{1,2}', currentDatabase(), local_table) r
4040
ARRAY JOIN arr AS arr_item
4141
INNER JOIN (SELECT 1 + number as arr_item from numbers(2)) AS foo USING (arr_item);
42+
43+
-- Fuzzed
44+
45+
SELECT arr FROM remote('127.0.0.2', currentDatabase(), local_table) AS r ARRAY JOIN arr AS arr_item GLOBAL RIGHT JOIN (SELECT 1 AS arr_item) AS foo USING (arr_item);

0 commit comments

Comments
 (0)