Skip to content

Commit 646cdad

Browse files
Backport ClickHouse#88350 to 25.8: count returns wrong answer
1 parent 0a29f83 commit 646cdad

File tree

5 files changed

+103
-35
lines changed

5 files changed

+103
-35
lines changed

src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -183,40 +183,6 @@ void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr & query_tree_node,
183183
if (query_node->isDistinct())
184184
continue;
185185
}
186-
else
187-
{
188-
auto * union_node = query_or_union_node->as<UnionNode>();
189-
chassert(union_node != nullptr);
190-
191-
/// We can't remove unused projections in the case of EXCEPT and INTERSECT
192-
/// because it can lead to incorrect query results. Example:
193-
///
194-
/// SELECT count()
195-
/// FROM
196-
/// (
197-
/// SELECT
198-
/// 1 AS a,
199-
/// 2 AS b
200-
/// INTERSECT ALL
201-
/// SELECT
202-
/// 1,
203-
/// 1
204-
/// )
205-
///
206-
/// Will be transformed into the following query with output 1 instead of 0:
207-
///
208-
/// SELECT count()
209-
/// FROM
210-
/// (
211-
/// SELECT
212-
/// 1 AS a, -- we must keep at least 1 column
213-
/// INTERSECT ALL
214-
/// SELECT
215-
/// 1
216-
/// );
217-
if (union_node->getUnionMode() > SelectUnionMode::UNION_DISTINCT)
218-
continue;
219-
}
220186

221187
auto used_projection_indexes = convertUsedColumnNamesToUsedProjectionIndexes(query_or_union_node, used_columns);
222188
updateUsedProjectionIndexes(query_or_union_node, used_projection_indexes);

src/Analyzer/UnionNode.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,35 @@ void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> &
122122
if (recursive_cte_table)
123123
return;
124124

125+
/// We can't remove unused projections in the case of EXCEPT and INTERSECT
126+
/// because it can lead to incorrect query results. Example:
127+
///
128+
/// SELECT count()
129+
/// FROM
130+
/// (
131+
/// SELECT
132+
/// 1 AS a,
133+
/// 2 AS b
134+
/// INTERSECT ALL
135+
/// SELECT
136+
/// 1,
137+
/// 1
138+
/// )
139+
///
140+
/// Will be transformed into the following query with output 1 instead of 0:
141+
///
142+
/// SELECT count()
143+
/// FROM
144+
/// (
145+
/// SELECT
146+
/// 1 AS a, -- we must keep at least 1 column
147+
/// INTERSECT ALL
148+
/// SELECT
149+
/// 1
150+
/// );
151+
if (union_mode > SelectUnionMode::UNION_DISTINCT)
152+
return;
153+
125154
auto & query_nodes = getQueries().getNodes();
126155
for (auto & query_node : query_nodes)
127156
{

src/Parsers/SelectUnionMode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ namespace DB
77
{
88
enum class SelectUnionMode : uint8_t
99
{
10-
UNION_DEFAULT,
10+
UNION_DEFAULT = 0,
1111
UNION_ALL,
1212
UNION_DISTINCT,
1313
EXCEPT_DEFAULT,
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
run enable-analyzer=1
2+
0 5
3+
1 1
4+
2 1
5+
run enable-analyzer=1 ignore
6+
0 5
7+
1 1
8+
2 1
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
CREATE TABLE left (g UInt32, i UInt32)
2+
ORDER BY (g, i);
3+
4+
INSERT INTO left VALUES
5+
(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (2, 0);
6+
7+
CREATE TABLE right (g UInt32, i UInt32)
8+
ORDER BY (g, i);
9+
10+
INSERT INTO right VALUES
11+
(0,0), (0, 3), (0, 4), (0, 6), (1, 0);
12+
13+
SET enable_analyzer = 1;
14+
15+
SELECT 'run enable-analyzer=1';
16+
with differences as
17+
(
18+
(
19+
select g, i from left
20+
where g BETWEEN 0 and 10
21+
EXCEPT ALL
22+
select g, i from right
23+
where g BETWEEN 0 and 10
24+
)
25+
UNION ALL
26+
(
27+
select g, i from right
28+
where g BETWEEN 0 and 10
29+
EXCEPT ALL
30+
select g, i from left
31+
where g BETWEEN 0 and 10
32+
)
33+
),
34+
diff_counts as
35+
(
36+
select g, count(*) from differences group by g
37+
)
38+
select * from diff_counts
39+
ORDER BY g;
40+
41+
SELECT 'run enable-analyzer=1 ignore';
42+
with differences as
43+
(
44+
(
45+
select g, i from left
46+
where g BETWEEN 0 and 10
47+
EXCEPT ALL
48+
select g, i from right
49+
where g BETWEEN 0 and 10
50+
)
51+
UNION ALL
52+
(
53+
select g, i from right
54+
where g BETWEEN 0 and 10
55+
EXCEPT ALL
56+
select g, i from left
57+
where g BETWEEN 0 and 10
58+
)
59+
),
60+
diff_counts as
61+
(
62+
select g, count(ignore(*)) from differences group by g
63+
)
64+
select * from diff_counts
65+
ORDER BY g;

0 commit comments

Comments
 (0)