Skip to content

Commit b9b996d

Browse files
Merge pull request ClickHouse#63519 from ClickHouse/analyzer-fix-join-projection-name
Analyzer: Fix column projection name after column type promotion in join
2 parents 7b47a42 + 5eeba0a commit b9b996d

8 files changed

+203
-40
lines changed

src/Analyzer/Resolve/IdentifierResolveScope.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ struct IdentifierResolveScope
147147
/// Table column name to column node. Valid only during table ALIAS columns resolve.
148148
ColumnNameToColumnNodeMap column_name_to_column_node;
149149

150+
std::list<std::unordered_map<std::string, ColumnNodePtr> *> join_using_columns;
151+
150152
/// CTE name to query node
151153
std::unordered_map<std::string, QueryTreeNodePtr> cte_name_to_query_node;
152154

src/Analyzer/Resolve/IdentifierResolver.cpp

Lines changed: 87 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <Analyzer/Resolve/TypoCorrection.h>
3131

3232
#include <Core/Settings.h>
33+
#include <iostream>
3334

3435
namespace DB
3536
{
@@ -329,6 +330,21 @@ bool IdentifierResolver::tryBindIdentifierToAliases(const IdentifierLookup & ide
329330
return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr;
330331
}
331332

333+
bool IdentifierResolver::tryBindIdentifierToJoinUsingColumn(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope)
334+
{
335+
for (const auto * join_using : scope.join_using_columns)
336+
{
337+
for (const auto & [using_column_name, _] : *join_using)
338+
{
339+
// std::cerr << identifier_lookup.identifier.getFullName() << " <===========> " << using_column_name << std::endl;
340+
if (identifier_lookup.identifier.getFullName() == using_column_name)
341+
return true;
342+
}
343+
}
344+
345+
return false;
346+
}
347+
332348
/** Resolve identifier from table columns.
333349
*
334350
* 1. If table column nodes are empty or identifier is not expression lookup return nullptr.
@@ -634,11 +650,15 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromStorage(
634650
tryBindIdentifierToTableExpressions(column_identifier_lookup, table_expression_node, scope))
635651
break;
636652

653+
if (tryBindIdentifierToJoinUsingColumn(column_identifier_lookup, scope))
654+
break;
655+
637656
qualified_identifier = std::move(qualified_identifier_with_removed_part);
638657
}
639658

640659
auto qualified_identifier_full_name = qualified_identifier.getFullName();
641660
node_to_projection_name.emplace(result_expression, std::move(qualified_identifier_full_name));
661+
// std::cerr << "resolved from storage : " << qualified_identifier.getFullName() << " as " << result_expression->dumpTree() << std::endl;
642662

643663
return { .resolved_identifier = result_expression, .resolve_place = IdentifierResolvePlace::JOIN_TREE };
644664
}
@@ -857,8 +877,36 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromJoin(const I
857877
IdentifierResolveScope & scope)
858878
{
859879
const auto & from_join_node = table_expression_node->as<const JoinNode &>();
860-
auto left_resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_join_node.getLeftTableExpression(), scope).resolved_identifier;
861-
auto right_resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_join_node.getRightTableExpression(), scope).resolved_identifier;
880+
JoinKind join_kind = from_join_node.getKind();
881+
882+
bool join_node_in_resolve_process = scope.table_expressions_in_resolve_process.contains(table_expression_node.get());
883+
std::unordered_map<std::string, ColumnNodePtr> join_using_column_name_to_column_node;
884+
885+
if (!join_node_in_resolve_process && from_join_node.isUsingJoinExpression())
886+
{
887+
auto & join_using_list = from_join_node.getJoinExpression()->as<ListNode &>();
888+
for (auto & join_using_node : join_using_list.getNodes())
889+
{
890+
auto & column_node = join_using_node->as<ColumnNode &>();
891+
join_using_column_name_to_column_node.emplace(column_node.getColumnName(), std::static_pointer_cast<ColumnNode>(join_using_node));
892+
}
893+
}
894+
895+
auto try_resolve_identifier_from_join_tree_node = [&](const QueryTreeNodePtr & join_tree_node, bool may_be_override_by_using_column)
896+
{
897+
if (may_be_override_by_using_column && !join_using_column_name_to_column_node.empty())
898+
scope.join_using_columns.push_back(&join_using_column_name_to_column_node);
899+
900+
auto res = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_tree_node, scope);
901+
902+
if (may_be_override_by_using_column && !join_using_column_name_to_column_node.empty())
903+
scope.join_using_columns.pop_back();
904+
905+
return std::move(res.resolved_identifier);
906+
};
907+
908+
auto left_resolved_identifier = try_resolve_identifier_from_join_tree_node(from_join_node.getLeftTableExpression(), join_kind == JoinKind::Right);
909+
auto right_resolved_identifier = try_resolve_identifier_from_join_tree_node(from_join_node.getRightTableExpression(), join_kind != JoinKind::Right);
862910

863911
if (!identifier_lookup.isExpressionLookup())
864912
{
@@ -875,20 +923,6 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromJoin(const I
875923
};
876924
}
877925

878-
bool join_node_in_resolve_process = scope.table_expressions_in_resolve_process.contains(table_expression_node.get());
879-
880-
std::unordered_map<std::string, ColumnNodePtr> join_using_column_name_to_column_node;
881-
882-
if (!join_node_in_resolve_process && from_join_node.isUsingJoinExpression())
883-
{
884-
auto & join_using_list = from_join_node.getJoinExpression()->as<ListNode &>();
885-
for (auto & join_using_node : join_using_list.getNodes())
886-
{
887-
auto & column_node = join_using_node->as<ColumnNode &>();
888-
join_using_column_name_to_column_node.emplace(column_node.getColumnName(), std::static_pointer_cast<ColumnNode>(join_using_node));
889-
}
890-
}
891-
892926
auto check_nested_column_not_in_using = [&join_using_column_name_to_column_node, &identifier_lookup](const QueryTreeNodePtr & node)
893927
{
894928
/** tldr: When an identifier is resolved into the function `nested` or `getSubcolumn`, and
@@ -950,13 +984,45 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromJoin(const I
950984
std::optional<JoinTableSide> resolved_side;
951985
QueryTreeNodePtr resolved_identifier;
952986

953-
JoinKind join_kind = from_join_node.getKind();
987+
auto convert_resolved_result_type_if_needed = [](
988+
const QueryTreeNodePtr & resolved_identifier_candidate,
989+
const std::unordered_map<std::string, ColumnNodePtr> & using_column_name_to_column_node,
990+
QueryTreeNodePtr & resolve_result,
991+
IdentifierResolveScope & current_scope,
992+
std::unordered_map<QueryTreeNodePtr, ProjectionName> & projection_name_mapping)
993+
{
994+
auto & resolved_column = resolved_identifier_candidate->as<ColumnNode &>();
995+
auto using_column_node_it = using_column_name_to_column_node.find(resolved_column.getColumnName());
996+
if (using_column_node_it != using_column_name_to_column_node.end() &&
997+
!using_column_node_it->second->getColumnType()->equals(*resolved_column.getColumnType()))
998+
{
999+
// std::cerr << "... fixing type for " << resolved_column.dumpTree() << std::endl;
1000+
auto resolved_column_clone = std::static_pointer_cast<ColumnNode>(resolved_column.clone());
1001+
resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
1002+
1003+
auto projection_name_it = projection_name_mapping.find(resolved_identifier_candidate);
1004+
if (projection_name_it != projection_name_mapping.end())
1005+
{
1006+
projection_name_mapping[resolved_column_clone] = projection_name_it->second;
1007+
// std::cerr << ".. upd name " << projection_name_it->second << " for col " << resolved_column_clone->dumpTree() << std::endl;
1008+
}
1009+
1010+
resolve_result = std::move(resolved_column_clone);
1011+
1012+
if (!resolve_result->isEqual(*using_column_node_it->second))
1013+
current_scope.join_columns_with_changed_types[resolve_result] = using_column_node_it->second;
1014+
}
1015+
};
9541016

9551017
/// If columns from left or right table were missed Object(Nullable('json')) subcolumns, they will be replaced
9561018
/// to ConstantNode(NULL), which can't be cast to ColumnNode, so we resolve it here.
9571019
if (auto missed_subcolumn_identifier = checkIsMissedObjectJSONSubcolumn(left_resolved_identifier, right_resolved_identifier))
9581020
return { .resolved_identifier = missed_subcolumn_identifier, .resolve_place = IdentifierResolvePlace::JOIN_TREE };
9591021

1022+
1023+
// for (const auto & [k, v] : join_using_column_name_to_column_node)
1024+
// std::cerr << k << " -> " << v->dumpTree() << std::endl;
1025+
9601026
if (left_resolved_identifier && right_resolved_identifier)
9611027
{
9621028
auto using_column_node_it = join_using_column_name_to_column_node.end();
@@ -1036,18 +1102,7 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromJoin(const I
10361102
}
10371103
else
10381104
{
1039-
auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
1040-
auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
1041-
if (using_column_node_it != join_using_column_name_to_column_node.end() &&
1042-
!using_column_node_it->second->getColumnType()->equals(*left_resolved_column.getColumnType()))
1043-
{
1044-
auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone());
1045-
left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
1046-
resolved_identifier = std::move(left_resolved_column_clone);
1047-
1048-
if (!resolved_identifier->isEqual(*using_column_node_it->second))
1049-
scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second;
1050-
}
1105+
convert_resolved_result_type_if_needed(left_resolved_identifier, join_using_column_name_to_column_node, resolved_identifier, scope, node_to_projection_name);
10511106
}
10521107
}
10531108
else if (right_resolved_identifier)
@@ -1061,17 +1116,7 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromJoin(const I
10611116
}
10621117
else
10631118
{
1064-
auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
1065-
auto using_column_node_it = join_using_column_name_to_column_node.find(right_resolved_column.getColumnName());
1066-
if (using_column_node_it != join_using_column_name_to_column_node.end() &&
1067-
!using_column_node_it->second->getColumnType()->equals(*right_resolved_column.getColumnType()))
1068-
{
1069-
auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone());
1070-
right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
1071-
resolved_identifier = std::move(right_resolved_column_clone);
1072-
if (!resolved_identifier->isEqual(*using_column_node_it->second))
1073-
scope.join_columns_with_changed_types[resolved_identifier] = using_column_node_it->second;
1074-
}
1119+
convert_resolved_result_type_if_needed(right_resolved_identifier, join_using_column_name_to_column_node, resolved_identifier, scope, node_to_projection_name);
10751120
}
10761121
}
10771122

@@ -1087,10 +1132,12 @@ IdentifierResolveResult IdentifierResolver::tryResolveIdentifierFromJoin(const I
10871132
auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope);
10881133
if (nullable_resolved_identifier)
10891134
{
1135+
// std::cerr << ".. convert to null " << nullable_resolved_identifier->dumpTree() << std::endl;
10901136
resolved_identifier = nullable_resolved_identifier;
10911137
/// Set the same projection name for new nullable node
10921138
if (projection_name_it != node_to_projection_name.end())
10931139
{
1140+
// std::cerr << "... upd name for null " << projection_name_it->second << " -> " << resolved_identifier->dumpTree() << std::endl;
10941141
node_to_projection_name.emplace(resolved_identifier, projection_name_it->second);
10951142
}
10961143
}

src/Analyzer/Resolve/IdentifierResolver.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ class IdentifierResolver
4848
const IdentifierLookup & identifier_lookup,
4949
const IdentifierResolveScope & scope);
5050

51+
static bool tryBindIdentifierToJoinUsingColumn(
52+
const IdentifierLookup & identifier_lookup,
53+
const IdentifierResolveScope & scope);
54+
5155
static bool tryBindIdentifierToTableExpression(
5256
const IdentifierLookup & identifier_lookup,
5357
const QueryTreeNodePtr & table_expression_node,
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
analyzer=1, join with dictionary
2+
┌──uid─┬─name─┬─gid─┬─gname──┐
3+
1. │ 1231 │ John │ 1 │ Group1 │
4+
└──────┴──────┴─────┴────────┘
5+
6+
analyzer=1, join with table
7+
┌──uid─┬─name─┬─gid─┬─gname──┐
8+
1. │ 1231 │ John │ 1 │ Group1 │
9+
└──────┴──────┴─────┴────────┘
10+
11+
analyzer=0, join with dictionary
12+
┌──uid─┬─name─┬─gid─┬─gname──┐
13+
1. │ 1231 │ John │ 1 │ Group1 │
14+
└──────┴──────┴─────┴────────┘
15+
16+
analyzer=0, join with table
17+
┌──uid─┬─name─┬─gid─┬─gname──┐
18+
1. │ 1231 │ John │ 1 │ Group1 │
19+
└──────┴──────┴─────┴────────┘
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
DROP DATABASE IF EXISTS db_for_dict_03149;
2+
CREATE DATABASE db_for_dict_03149;
3+
4+
CREATE TABLE db_for_dict_03149.users (uid Int16, name String, gid LowCardinality(String), gname LowCardinality(String))
5+
ENGINE=MergeTree order by tuple();
6+
CREATE TABLE db_for_dict_03149.groups (gid LowCardinality(String), gname LowCardinality(String))
7+
ENGINE=MergeTree order by tuple();
8+
9+
CREATE DICTIONARY db_for_dict_03149.groups_dict (
10+
gid String, gname String
11+
)
12+
PRIMARY KEY gid, gname
13+
LAYOUT(COMPLEX_KEY_HASHED())
14+
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() QUERY 'select * from db_for_dict_03149.groups'))
15+
LIFETIME(MIN 0 MAX 0);
16+
17+
18+
INSERT INTO db_for_dict_03149.groups VALUES ('1', 'Group1');
19+
20+
INSERT INTO db_for_dict_03149.users VALUES (1231, 'John', '1', 'Group1');
21+
22+
select 'analyzer=1, join with dictionary';
23+
24+
SELECT u.uid, u.name, u.gid, u.gname
25+
FROM db_for_dict_03149.users u left join db_for_dict_03149.groups_dict g using gid, gname
26+
format PrettyCompactMonoBlock;
27+
28+
select '';
29+
select 'analyzer=1, join with table';
30+
31+
SELECT u.uid, u.name, u.gid, u.gname
32+
FROM db_for_dict_03149.users u left join db_for_dict_03149.groups g using gid, gname
33+
format PrettyCompactMonoBlock;
34+
35+
36+
set allow_experimental_analyzer=0;
37+
38+
select '';
39+
select 'analyzer=0, join with dictionary';
40+
41+
SELECT u.uid, u.name, u.gid, u.gname
42+
FROM db_for_dict_03149.users u left join db_for_dict_03149.groups_dict g using gid, gname
43+
format PrettyCompactMonoBlock;
44+
45+
select '';
46+
select 'analyzer=0, join with table';
47+
SELECT u.uid, u.name, u.gid, u.gname
48+
FROM db_for_dict_03149.users u left join db_for_dict_03149.groups g using gid, gname
49+
format PrettyCompactMonoBlock;
50+
51+
DROP DATABASE IF EXISTS db_for_dict_03149;
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
┌──uid─┬─name───┬─gid─┬─gname──┐
2+
1. │ 1231 │ John │ 1 │ Group1 │
3+
2. │ 1234 │ Test │ 2 │ Group1 │
4+
3. │ 6666 │ Ksenia │ 1 │ Group1 │
5+
4. │ 8888 │ Alice │ 1 │ Group1 │
6+
└──────┴────────┴─────┴────────┘
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
DROP DATABASE IF EXISTS db_for_dict_03149;
2+
CREATE DATABASE db_for_dict_03149;
3+
4+
CREATE TABLE db_for_dict_03149.users (uid Int16, name String, gid LowCardinality(String), gname LowCardinality(String))
5+
ENGINE=MergeTree order by tuple();
6+
CREATE TABLE db_for_dict_03149.groups (gid LowCardinality(String), gname LowCardinality(String))
7+
ENGINE=MergeTree order by tuple();
8+
9+
CREATE TABLE db_for_dict_03149.target (uid Int16, name String, gid LowCardinality(String), gname LowCardinality(String))
10+
ENGINE=MergeTree order by tuple();
11+
12+
CREATE DICTIONARY db_for_dict_03149.groups_dict (
13+
gid String, gname String
14+
)
15+
PRIMARY KEY gid, gname
16+
LAYOUT(COMPLEX_KEY_HASHED())
17+
SOURCE(CLICKHOUSE(QUERY 'select * from db_for_dict_03149.groups'))
18+
LIFETIME(MIN 0 MAX 0);
19+
20+
CREATE MATERIALIZED VIEW db_for_dict_03149.mv to db_for_dict_03149.target AS
21+
SELECT u.uid, u.name, u.gid, u.gname
22+
FROM db_for_dict_03149.users u left join db_for_dict_03149.groups_dict g using gid, gname;
23+
24+
INSERT INTO db_for_dict_03149.groups VALUES ('1', 'Group1');
25+
26+
INSERT INTO db_for_dict_03149.users VALUES (1231, 'John', '1', 'Group1');
27+
INSERT INTO db_for_dict_03149.users VALUES (6666, 'Ksenia', '1', 'Group1');
28+
INSERT INTO db_for_dict_03149.users VALUES (8888, 'Alice', '1', 'Group1');
29+
INSERT INTO db_for_dict_03149.users VALUES (1234, 'Test', '2', 'Group1');
30+
31+
SELECT * FROM db_for_dict_03149.target ORDER BY uid format PrettyCompactMonoBlock;
32+
33+
DROP DATABASE IF EXISTS db_for_dict_03149;

tests/queries/0_stateless/03408_cte_self_reference.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
-- Tags: no-tsan
12
SET enable_analyzer = 1;
23

34
WITH `cte1` AS (

0 commit comments

Comments
 (0)