Skip to content

Commit 50f1e34

Browse files
authored
Merge pull request #1122 from Altinity/backport/25.3.8/79969
25.3.8 Backport of ClickHouse#79969 - Fix match(col, '^…') index analysis with escaped metacharacters to avoid wrong results and crashes
2 parents f36ec92 + 922bc38 commit 50f1e34

File tree

3 files changed

+169
-3
lines changed

3 files changed

+169
-3
lines changed

src/Storages/MergeTree/KeyCondition.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ static String extractFixedPrefixFromRegularExpression(const String & regexp)
126126
const char * pos = begin;
127127
const char * end = regexp.data() + regexp.size();
128128

129-
while (pos != end)
129+
while (pos < end)
130130
{
131131
switch (*pos)
132132
{
@@ -149,19 +149,22 @@ static String extractFixedPrefixFromRegularExpression(const String & regexp)
149149
case '$':
150150
case '.':
151151
case '[':
152+
case ']':
152153
case '?':
153154
case '*':
154155
case '+':
156+
case '\\':
155157
case '{':
158+
case '}':
159+
case '-':
156160
fixed_prefix += *pos;
161+
++pos;
157162
break;
158163
default:
159164
/// all other escape sequences are not supported
160165
pos = end;
161-
break;
162166
}
163167

164-
++pos;
165168
break;
166169
}
167170

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
Condition: (path in [\'xxx(zzz\', \'xxx(zz{\'))
2+
1
3+
Condition: (path in [\'xxx)zzz\', \'xxx)zz{\'))
4+
1
5+
Condition: (path in [\'xxx^zzz\', \'xxx^zz{\'))
6+
1
7+
Condition: (path in [\'xxx$zzz\', \'xxx$zz{\'))
8+
1
9+
Condition: (path in [\'xxx.zzz\', \'xxx.zz{\'))
10+
1
11+
Condition: (path in [\'xxx[zzz\', \'xxx[zz{\'))
12+
1
13+
Condition: (path in [\'xxx]zzz\', \'xxx]zz{\'))
14+
1
15+
Condition: (path in [\'xxx?zzz\', \'xxx?zz{\'))
16+
1
17+
Condition: (path in [\'xxx*zzz\', \'xxx*zz{\'))
18+
1
19+
Condition: (path in [\'xxx+zzz\', \'xxx+zz{\'))
20+
1
21+
Condition: (path in [\'xxx\\\\zzz\', \'xxx\\\\zz{\'))
22+
1
23+
Condition: (path in [\'xxx{zzz\', \'xxx{zz{\'))
24+
1
25+
Condition: (path in [\'xxx}zzz\', \'xxx}zz{\'))
26+
1
27+
Condition: (path in [\'xxx-zzz\', \'xxx-zz{\'))
28+
1
29+
Condition: (path in [\'xxx\', \'xxy\'))
30+
0
31+
Condition: (path in [\'xxx\', \'xxy\'))
32+
0
33+
Condition: (path in [\'xxx\', \'xxy\'))
34+
0
35+
Condition: (path in [\'xxx\', \'xxy\'))
36+
0
37+
Condition: (path in [\'xxx\', \'xxy\'))
38+
0
39+
Condition: (path in [\'xxx\', \'xxy\'))
40+
0
41+
Condition: (path in [\'xxx\', \'xxy\'))
42+
15
43+
Condition: (path in [\'xxx\', \'xxy\'))
44+
15
45+
Condition: (path in [\'xxx\', \'xxy\'))
46+
15
47+
Condition: (path in [\'xxx\', \'xxy\'))
48+
0
49+
Condition: (path in [\'xxx\', \'xxy\'))
50+
0
51+
Condition: true
52+
Condition: true
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
SET parallel_replicas_local_plan=1;
2+
3+
drop table if exists foo;
4+
5+
CREATE TABLE foo (id UInt8, path String) engine = MergeTree ORDER BY (path) SETTINGS index_granularity=1;
6+
7+
INSERT INTO foo VALUES (1, 'xxx|yyy'),
8+
(2, 'xxx(zzz'),
9+
(3, 'xxx)zzz'),
10+
(4, 'xxx^zzz'),
11+
(5, 'xxx$zzz'),
12+
(6, 'xxx.zzz'),
13+
(7, 'xxx[zzz'),
14+
(8, 'xxx]zzz'),
15+
(9, 'xxx?zzz'),
16+
(10, 'xxx*zzz'),
17+
(11, 'xxx+zzz'),
18+
(12, 'xxx\\zzz'),
19+
(13, 'xxx{zzz'),
20+
(14, 'xxx}zzz'),
21+
(15, 'xxx-zzz');
22+
23+
24+
-- check if also escaped sequence are properly extracted
25+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\(zzz')) WHERE explain like '%Condition%';
26+
SELECT count() FROM foo WHERE match(path, '^xxx\\(zzz') SETTINGS force_primary_key = 1;
27+
28+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\)zzz')) WHERE explain like '%Condition%';
29+
SELECT count() FROM foo WHERE match(path, '^xxx\\)zzz') SETTINGS force_primary_key = 1;
30+
31+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\^zzz')) WHERE explain like '%Condition%';
32+
SELECT count() FROM foo WHERE match(path, '^xxx\\^zzz') SETTINGS force_primary_key = 1;
33+
34+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\$zzz')) WHERE explain like '%Condition%';
35+
SELECT count() FROM foo WHERE match(path, '^xxx\\$zzz') SETTINGS force_primary_key = 1;
36+
37+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\.zzz')) WHERE explain like '%Condition%';
38+
SELECT count() FROM foo WHERE match(path, '^xxx\\.zzz') SETTINGS force_primary_key = 1;
39+
40+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\[zzz')) WHERE explain like '%Condition%';
41+
SELECT count() FROM foo WHERE match(path, '^xxx\\[zzz') SETTINGS force_primary_key = 1;
42+
43+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\]zzz')) WHERE explain like '%Condition%';
44+
SELECT count() FROM foo WHERE match(path, '^xxx\\]zzz') SETTINGS force_primary_key = 1;
45+
46+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\?zzz')) WHERE explain like '%Condition%';
47+
SELECT count() FROM foo WHERE match(path, '^xxx\\?zzz') SETTINGS force_primary_key = 1;
48+
49+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\*zzz')) WHERE explain like '%Condition%';
50+
SELECT count() FROM foo WHERE match(path, '^xxx\\*zzz') SETTINGS force_primary_key = 1;
51+
52+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\+zzz')) WHERE explain like '%Condition%';
53+
SELECT count() FROM foo WHERE match(path, '^xxx\\+zzz') SETTINGS force_primary_key = 1;
54+
55+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\\\zzz')) WHERE explain like '%Condition%';
56+
SELECT count() FROM foo WHERE match(path, '^xxx\\\\zzz') SETTINGS force_primary_key = 1;
57+
58+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\{zzz')) WHERE explain like '%Condition%';
59+
SELECT count() FROM foo WHERE match(path, '^xxx\\{zzz') SETTINGS force_primary_key = 1;
60+
61+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\}zzz')) WHERE explain like '%Condition%';
62+
SELECT count() FROM foo WHERE match(path, '^xxx\\}zzz') SETTINGS force_primary_key = 1;
63+
64+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\-zzz')) WHERE explain like '%Condition%';
65+
SELECT count() FROM foo WHERE match(path, '^xxx\\-zzz') SETTINGS force_primary_key = 1;
66+
67+
68+
-- those regex chars prevent the index use (only 3 first chars used during index scan)
69+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\0bla')) WHERE explain like '%Condition%';
70+
SELECT count() FROM foo WHERE match(path, '^xxx\0bla') SETTINGS force_primary_key = 1;
71+
72+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx(bla)')) WHERE explain like '%Condition%';
73+
SELECT count() FROM foo WHERE match(path, '^xxx(bla)') SETTINGS force_primary_key = 1;
74+
75+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx[bla]')) WHERE explain like '%Condition%';
76+
SELECT count() FROM foo WHERE match(path, '^xxx[bla]') SETTINGS force_primary_key = 1;
77+
78+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx^bla')) WHERE explain like '%Condition%';
79+
SELECT count() FROM foo WHERE match(path, '^xxx^bla') SETTINGS force_primary_key = 1;
80+
81+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx.bla')) WHERE explain like '%Condition%';
82+
SELECT count() FROM foo WHERE match(path, '^xxx.bla') SETTINGS force_primary_key = 1;
83+
84+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx+bla')) WHERE explain like '%Condition%';
85+
SELECT count() FROM foo WHERE match(path, '^xxx+bla') SETTINGS force_primary_key = 1;
86+
87+
88+
-- here the forth char is not used during index, because it has 0+ quantifier
89+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx{0,1}')) WHERE explain like '%Condition%';
90+
SELECT count() FROM foo WHERE match(path, '^xxxx{0,1}') SETTINGS force_primary_key = 1;
91+
92+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx?')) WHERE explain like '%Condition%';
93+
SELECT count() FROM foo WHERE match(path, '^xxxx?') SETTINGS force_primary_key = 1;
94+
95+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx*')) WHERE explain like '%Condition%';
96+
SELECT count() FROM foo WHERE match(path, '^xxxx*') SETTINGS force_primary_key = 1;
97+
98+
-- some unsupported regex chars - only 3 first chars used during index scan
99+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\d+')) WHERE explain like '%Condition%';
100+
SELECT count() FROM foo WHERE match(path, '^xxx\d+') SETTINGS force_primary_key = 1;
101+
102+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\w+')) WHERE explain like '%Condition%';
103+
SELECT count() FROM foo WHERE match(path, '^xxx\w+') SETTINGS force_primary_key = 1;
104+
105+
106+
-- fully disabled for pipes - see https://github.com/ClickHouse/ClickHouse/pull/54696
107+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxx\\|zzz')) WHERE explain like '%Condition%';
108+
SELECT count() FROM foo WHERE match(path, '^xxx\\|zzz') SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED }
109+
110+
SELECT trimLeft(explain) FROM (EXPLAIN PLAN indexes=1 SELECT id FROM foo WHERE match(path, '^xxxx|foo')) WHERE explain like '%Condition%';
111+
SELECT count() FROM foo WHERE match(path, '^xxxx|foo') SETTINGS force_primary_key = 1; -- { serverError INDEX_NOT_USED }

0 commit comments

Comments
 (0)