Skip to content

Commit ee90b1a

Browse files
Merge pull request ClickHouse#92739 from ClickHouse/backport/25.8/92726
Backport ClickHouse#92726 to 25.8: Fix incorrect granules pruning in`KeyCondition` for `match` function
2 parents c2643eb + cdd5d2c commit ee90b1a

File tree

3 files changed

+144
-0
lines changed

3 files changed

+144
-0
lines changed

src/Storages/MergeTree/KeyCondition.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3300,6 +3300,16 @@ BoolMask KeyCondition::checkInHyperrectangle(
33003300
rpn_stack.back().can_be_true = mayExistOnBloomFilter(*element.bloom_filter_data, column_index_to_column_bf);
33013301
}
33023302

3303+
/// If the condition is relaxed, the `can_be_false` branch is no longer reliable; it may have false negatives.
3304+
/// If `element.range` is relaxed (and thus wider) and contains `key_range`, then `can_be_false` becomes false.
3305+
/// However, in reality `can_be_false` may be true, because the actual range of element may be stricter than `element.range`.
3306+
/// For example, for `match(...)`, a false negative here (i.e. `can_be_false` is false) would make
3307+
/// `not match(...)` set `can_be_true = false`, causing us to skip the granule, which would be incorrect.
3308+
/// Therefore, we must set `can_be_false = true` to be safe.
3309+
/// Additionally, when `KeyCondition::isRelaxed()` is true, the caller should ignore `can_be_false` anyway.
3310+
if (element.relaxed)
3311+
rpn_stack.back().can_be_false = true;
3312+
33033313
if (element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
33043314
rpn_stack.back() = !rpn_stack.back();
33053315
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
-- { echo }
2+
SELECT count(*)
3+
FROM 03772_table_match
4+
WHERE NOT match(url, '^https?://clickhouse[.]com/');
5+
1
6+
EXPLAIN indexes = 1
7+
SELECT count(*)
8+
FROM 03772_table_match
9+
WHERE NOT match(url, '^https?://clickhouse[.]com/');
10+
Expression ((Project names + Projection))
11+
Aggregating
12+
Expression (Before GROUP BY)
13+
Filter ((WHERE + Change column names to column identifiers))
14+
ReadFromMergeTree (default.03772_table_match)
15+
Indexes:
16+
PrimaryKey
17+
Keys:
18+
url
19+
Condition: not((url in [\'http\', \'httq\')))
20+
Parts: 1/1
21+
Granules: 1/1
22+
Search Algorithm: generic exclusion search
23+
Ranges: 1
24+
SELECT count(*)
25+
FROM 03772_table_match
26+
WHERE NOT match(url, '^abcd');
27+
1
28+
EXPLAIN indexes = 1
29+
SELECT count(*)
30+
FROM 03772_table_match
31+
WHERE NOT match(url, '^abcd');
32+
Expression ((Project names + Projection))
33+
Aggregating
34+
Expression (Before GROUP BY)
35+
Filter ((WHERE + Change column names to column identifiers))
36+
ReadFromMergeTree (default.03772_table_match)
37+
Indexes:
38+
PrimaryKey
39+
Keys:
40+
url
41+
Condition: not((url in [\'abcd\', \'abce\')))
42+
Parts: 1/1
43+
Granules: 1/1
44+
Search Algorithm: generic exclusion search
45+
Ranges: 1
46+
SELECT count(*)
47+
FROM 03772_table_match
48+
WHERE match(url, '^abcd');
49+
0
50+
EXPLAIN indexes = 1
51+
SELECT count(*)
52+
FROM 03772_table_match
53+
WHERE match(url, '^abcd');
54+
Expression ((Project names + Projection))
55+
Aggregating
56+
Expression (Before GROUP BY)
57+
Filter ((WHERE + Change column names to column identifiers))
58+
ReadFromMergeTree (default.03772_table_match)
59+
Indexes:
60+
PrimaryKey
61+
Keys:
62+
url
63+
Condition: (url in [\'abcd\', \'abce\'))
64+
Parts: 0/1
65+
Granules: 0/1
66+
Search Algorithm: binary search
67+
Ranges: 0
68+
SELECT count(*)
69+
FROM 03772_table_match
70+
WHERE match(url, '^https?://clickhouse[.]com/') = false;
71+
1
72+
EXPLAIN indexes = 1
73+
SELECT count(*)
74+
FROM 03772_table_match
75+
WHERE match(url, '^https?://clickhouse[.]com/') = false;
76+
Expression ((Project names + Projection))
77+
Aggregating
78+
Expression (Before GROUP BY)
79+
Filter ((WHERE + Change column names to column identifiers))
80+
ReadFromMergeTree (default.03772_table_match)
81+
Indexes:
82+
PrimaryKey
83+
Condition: true
84+
Parts: 1/1
85+
Granules: 1/1
86+
Ranges: 1
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
-- Tags: no-replicated-database, no-parallel-replicas
2+
-- no-replicated-database: EXPLAIN output differs for replicated database.
3+
-- no-parallel-replicas: EXPLAIN output differs for parallel replicas.
4+
5+
6+
DROP TABLE IF EXISTS 03772_table_match;
7+
8+
CREATE TABLE 03772_table_match
9+
ENGINE = MergeTree()
10+
ORDER BY url AS
11+
SELECT 'http://example1.com/' AS url;
12+
13+
-- { echo }
14+
SELECT count(*)
15+
FROM 03772_table_match
16+
WHERE NOT match(url, '^https?://clickhouse[.]com/');
17+
18+
EXPLAIN indexes = 1
19+
SELECT count(*)
20+
FROM 03772_table_match
21+
WHERE NOT match(url, '^https?://clickhouse[.]com/');
22+
23+
SELECT count(*)
24+
FROM 03772_table_match
25+
WHERE NOT match(url, '^abcd');
26+
27+
EXPLAIN indexes = 1
28+
SELECT count(*)
29+
FROM 03772_table_match
30+
WHERE NOT match(url, '^abcd');
31+
32+
SELECT count(*)
33+
FROM 03772_table_match
34+
WHERE match(url, '^abcd');
35+
36+
EXPLAIN indexes = 1
37+
SELECT count(*)
38+
FROM 03772_table_match
39+
WHERE match(url, '^abcd');
40+
41+
SELECT count(*)
42+
FROM 03772_table_match
43+
WHERE match(url, '^https?://clickhouse[.]com/') = false;
44+
45+
EXPLAIN indexes = 1
46+
SELECT count(*)
47+
FROM 03772_table_match
48+
WHERE match(url, '^https?://clickhouse[.]com/') = false;

0 commit comments

Comments
 (0)