Skip to content

Commit f6ada1f

Browse files
authored
Merge pull request ClickHouse#75676 from ClickHouse/vidmir/issue_73142
Fix column name clash with `join_using_top_level_identifier`
2 parents b59e216 + f0cb85b commit f6ada1f

8 files changed

+182
-8
lines changed

src/Analyzer/JoinNode.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ static ASTPtr tryMakeUsingColumnASTWithAlias(const QueryTreeNodePtr & node)
8181
if (!lhs_column_node || !rhs_column_node)
8282
return nullptr;
8383

84+
/// If USING column resolved from projection, keep its name
85+
if (lhs_column_node->hasExpression())
86+
return nullptr;
87+
8488
if (lhs_column_node->getColumnName() == rhs_column_node->getColumnName())
8589
return nullptr;
8690

src/Analyzer/Resolve/QueryAnalyzer.cpp

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,19 +1743,26 @@ bool hasTableExpressionInJoinTree(const QueryTreeNodePtr & join_tree_node, const
17431743
if (node_to_process == table_expression)
17441744
return true;
17451745

1746-
if (node_to_process->getNodeType() == QueryTreeNodeType::JOIN)
1746+
auto node_type = node_to_process->getNodeType();
1747+
1748+
if (node_type == QueryTreeNodeType::JOIN)
17471749
{
17481750
const auto & join_node = node_to_process->as<JoinNode &>();
17491751
nodes_to_process.push_back(join_node.getLeftTableExpression());
17501752
nodes_to_process.push_back(join_node.getRightTableExpression());
17511753
}
1752-
1753-
if (node_to_process->getNodeType() == QueryTreeNodeType::CROSS_JOIN)
1754+
else if (node_type == QueryTreeNodeType::CROSS_JOIN)
17541755
{
17551756
const auto & join_node = node_to_process->as<CrossJoinNode &>();
17561757
for (const auto & expr : join_node.getTableExpressions())
17571758
nodes_to_process.push_back(expr);
17581759
}
1760+
else if (node_type == QueryTreeNodeType::ARRAY_JOIN)
1761+
{
1762+
const auto & array_join_node = node_to_process->as<ArrayJoinNode &>();
1763+
nodes_to_process.push_back(array_join_node.getTableExpression());
1764+
}
1765+
17591766
}
17601767
return false;
17611768
}
@@ -2203,6 +2210,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
22032210
table_expression_columns,
22042211
scope);
22052212

2213+
updateMatchedColumnsFromJoinUsing(matched_column_nodes_with_names, table_expression, scope);
2214+
22062215
table_expressions_column_nodes_with_names_stack.push_back(std::move(matched_column_nodes_with_names));
22072216
}
22082217

@@ -5202,6 +5211,58 @@ void QueryAnalyzer::resolveCrossJoin(QueryTreeNodePtr & cross_join_node, Identif
52025211
}
52035212
}
52045213

5214+
static NameSet getColumnsFromTableExpression(const QueryTreeNodePtr & table_expression)
5215+
{
5216+
NameSet existing_columns;
5217+
switch (table_expression->getNodeType())
5218+
{
5219+
case QueryTreeNodeType::TABLE: {
5220+
const auto * table_node = table_expression->as<TableNode>();
5221+
chassert(table_node);
5222+
5223+
auto get_column_options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns();
5224+
for (const auto & column : table_node->getStorageSnapshot()->getColumns(get_column_options))
5225+
existing_columns.insert(column.name);
5226+
5227+
return existing_columns;
5228+
}
5229+
case QueryTreeNodeType::TABLE_FUNCTION: {
5230+
const auto * table_function_node = table_expression->as<TableFunctionNode>();
5231+
chassert(table_function_node);
5232+
5233+
auto get_column_options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns();
5234+
for (const auto & column : table_function_node->getStorageSnapshot()->getColumns(get_column_options))
5235+
existing_columns.insert(column.name);
5236+
5237+
return existing_columns;
5238+
}
5239+
case QueryTreeNodeType::QUERY: {
5240+
const auto * query_node = table_expression->as<QueryNode>();
5241+
chassert(query_node);
5242+
5243+
for (const auto & column : query_node->getProjectionColumns())
5244+
existing_columns.insert(column.name);
5245+
5246+
return existing_columns;
5247+
}
5248+
case QueryTreeNodeType::UNION: {
5249+
const auto * union_node = table_expression->as<UnionNode>();
5250+
chassert(union_node);
5251+
5252+
for (const auto & column : union_node->computeProjectionColumns())
5253+
existing_columns.insert(column.name);
5254+
5255+
return existing_columns;
5256+
}
5257+
default:
5258+
throw Exception(
5259+
ErrorCodes::LOGICAL_ERROR,
5260+
"Expected TableNode, TableFunctionNode, QueryNode or UnionNode, got {}: {}",
5261+
table_expression->getNodeTypeName(),
5262+
table_expression->formatASTForErrorMessage());
5263+
}
5264+
}
5265+
52055266
/// Resolve join node in scope
52065267
void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor)
52075268
{
@@ -5286,11 +5347,15 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
52865347
const auto & resolved_nodes = left_subquery->getProjection().getNodes();
52875348
if (resolved_nodes.size() == 1)
52885349
{
5350+
/// Added column should not conflict with existing column names
5351+
NameSet existing_columns = getColumnsFromTableExpression(left_table_expression);
5352+
5353+
NameAndTypePair column_name_type(identifier_full_name_, resolved_nodes.front()->getResultType());
5354+
while (existing_columns.contains(column_name_type.name))
5355+
column_name_type.name = "_" + column_name_type.name;
5356+
52895357
/// Create ColumnNode with expression from parent projection
5290-
return std::make_shared<ColumnNode>(
5291-
NameAndTypePair{identifier_full_name_, resolved_nodes.front()->getResultType()},
5292-
resolved_nodes.front(),
5293-
left_table_expression);
5358+
return std::make_shared<ColumnNode>(std::move(column_name_type), resolved_nodes.front(), left_table_expression);
52945359
}
52955360
}
52965361
}
@@ -5324,7 +5389,7 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
53245389
{
53255390
String extra_message;
53265391
const QueryNode * query_node = scope.scope_node ? scope.scope_node->as<QueryNode>() : nullptr;
5327-
if (settings[Setting::analyzer_compatibility_join_using_top_level_identifier] && query_node)
5392+
if (!settings[Setting::analyzer_compatibility_join_using_top_level_identifier] && query_node)
53285393
{
53295394
for (const auto & projection_node : query_node->getProjection().getNodes())
53305395
{

tests/queries/0_stateless/02989_join_using_parent_scope.reference

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
-- { echoOn }
2+
SELECT 1 AS c0 FROM (SELECT 1 AS c1) t0 JOIN (SELECT 1 AS c0) t1 USING (c0);
3+
1
4+
SELECT 1 AS c0 FROM (SELECT 1 AS c0) t0 JOIN (SELECT 1 AS c0) t1 USING (c0);
5+
1
26
SELECT 1 AS a FROM tb JOIN tabc USING (a) ORDER BY ALL;
37
1
48
1
@@ -87,6 +91,7 @@ SELECT b + 1 AS a, s FROM tb FULL OUTER JOIN tabc USING (a) PREWHERE a > 2 ORDER
8791
\N abc0
8892
\N abc1
8993
\N abc2
94+
EXPLAIN PIPELINE SELECT (SELECT 1) AS c0 FROM (SELECT 1 AS c0, 1 AS c1) tx JOIN (SELECT 0 AS c0, 1 AS c1) ty USING (c0, c1) FORMAT Null SETTINGS enable_analyzer = 1;
9095
-- It's a default behavior for old analyzer and new with analyzer_compatibility_join_using_top_level_identifier
9196
-- Column `b` actually exists in left table, but `b` from USING is resoled to `a + 2` and `a` is not in left table
9297
-- so we get UNKNOWN_IDENTIFIER error.

tests/queries/0_stateless/02989_join_using_parent_scope.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ SET join_use_nulls = 1;
1515
SET analyzer_compatibility_join_using_top_level_identifier = 1;
1616

1717
-- { echoOn }
18+
SELECT 1 AS c0 FROM (SELECT 1 AS c1) t0 JOIN (SELECT 1 AS c0) t1 USING (c0);
19+
SELECT 1 AS c0 FROM (SELECT 1 AS c0) t0 JOIN (SELECT 1 AS c0) t1 USING (c0);
20+
1821
SELECT 1 AS a FROM tb JOIN tabc USING (a) ORDER BY ALL;
1922
SELECT a + 2 AS b FROM ta JOIN tabc USING (b) ORDER BY ALL;
2023
SELECT b + 2 AS a FROM tb JOIN tabc USING (a) ORDER BY ALL;
@@ -40,6 +43,7 @@ SELECT b + 1 AS a, * FROM (SELECT b FROM tb) t1 FULL JOIN (SELECT a, b FROM tabc
4043

4144
SELECT b + 1 AS a, s FROM tb FULL OUTER JOIN tabc USING (a) PREWHERE a > 2 ORDER BY ALL SETTINGS enable_analyzer = 1;
4245

46+
EXPLAIN PIPELINE SELECT (SELECT 1) AS c0 FROM (SELECT 1 AS c0, 1 AS c1) tx JOIN (SELECT 0 AS c0, 1 AS c1) ty USING (c0, c1) FORMAT Null SETTINGS enable_analyzer = 1;
4347

4448
-- It's a default behavior for old analyzer and new with analyzer_compatibility_join_using_top_level_identifier
4549
-- Column `b` actually exists in left table, but `b` from USING is resoled to `a + 2` and `a` is not in left table
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
-- { echoOn }
2+
3+
SELECT b FROM t1;
4+
1
5+
1
6+
1
7+
SELECT b FROM t1 JOIN t2 USING b;
8+
1
9+
1
10+
1
11+
1
12+
1
13+
1
14+
1
15+
1
16+
1
17+
SELECT 1 AS b FROM t1 JOIN t2 USING b;
18+
1
19+
1
20+
1
21+
1
22+
1
23+
1
24+
1
25+
1
26+
1
27+
SELECT 1 AS b FROM t1 JOIN t2 USING b SETTINGS analyzer_compatibility_join_using_top_level_identifier = 1;
28+
1
29+
1
30+
1
31+
1
32+
1
33+
1
34+
1
35+
1
36+
1
37+
SELECT 2 AS a FROM t1 JOIN t2 USING a SETTINGS analyzer_compatibility_join_using_top_level_identifier = 1;
38+
2
39+
2
40+
2
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
DROP TABLE IF EXISTS t1;
3+
CREATE TABLE t1 (a Int32, b Int32 ALIAS 1) ENGINE = MergeTree ORDER BY tuple();
4+
INSERT INTO t1 VALUES (1), (2), (3);
5+
6+
DROP TABLE IF EXISTS t2;
7+
CREATE TABLE t2 (a Int32, b Int32 ALIAS 1) ENGINE = MergeTree ORDER BY tuple();
8+
INSERT INTO t2 VALUES (2), (3), (4);
9+
10+
-- { echoOn }
11+
12+
SELECT b FROM t1;
13+
SELECT b FROM t1 JOIN t2 USING b;
14+
SELECT 1 AS b FROM t1 JOIN t2 USING b;
15+
SELECT 1 AS b FROM t1 JOIN t2 USING b SETTINGS analyzer_compatibility_join_using_top_level_identifier = 1;
16+
SELECT 2 AS a FROM t1 JOIN t2 USING a SETTINGS analyzer_compatibility_join_using_top_level_identifier = 1;
17+
18+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2 3 2
2+
3 4 3
3+
4 5 4
4+
5 6 5
5+
2 2
6+
3 3
7+
4 4
8+
5 5
9+
3 2
10+
4 3
11+
5 4
12+
6 5
13+
(1,2,1) 2 2
14+
(2,3,1) 3 3
15+
(3,4,1) 4 4
16+
(4,5,1) 5 5
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env -S ${HOME}/clickhouse-client --queries-file
2+
3+
4+
DROP TABLE IF EXISTS t1;
5+
DROP TABLE IF EXISTS t2;
6+
7+
CREATE TABLE t1 (`b` Float64) ENGINE = MergeTree ORDER BY tuple();
8+
INSERT INTO t1 VALUES (1.0), (2.0), (3.0), (4.0), (5.0);
9+
10+
CREATE TABLE t2 (`a` UInt32) ENGINE = MergeTree ORDER BY a;
11+
INSERT INTO t2 VALUES (1), (2), (3), (4), (5);
12+
13+
SET enable_analyzer = 1;
14+
15+
SET analyzer_compatibility_join_using_top_level_identifier = 1;
16+
17+
SELECT * APPLY ((x) -> x+1), b + 1 AS a FROM t1 INNER JOIN t2 USING (a) ORDER BY ALL;
18+
SELECT t1.* APPLY ((x) -> x+1), b + 1 AS a FROM t1 INNER JOIN t2 USING (a) ORDER BY ALL;
19+
SELECT t2.* APPLY ((x) -> x+1), b + 1 AS a FROM t1 INNER JOIN t2 USING (a) ORDER BY ALL;
20+
21+
SELECT (*, 1), b + 1 AS a, b + 1 AS a FROM t1 INNER JOIN t2 USING (a) ORDER BY ALL;
22+

0 commit comments

Comments
 (0)