Skip to content

Commit 2fb5eb3

Browse files
committed
extmod/re1.5: Check and report byte overflow errors in _compilecode.
The generated regex code is limited in the range of jumps and counts, and this commit checks all cases which can overflow given the right kind of input regex, and returns an error in such a case. This change assumes that the results that overflow an int8_t do not overflow a platform int. Closes: #7078 Signed-off-by: Jeff Epler <[email protected]> # Conflicts: # extmod/re1.5/compilecode.c
1 parent 82479b6 commit 2fb5eb3

File tree

3 files changed

+61
-12
lines changed

3 files changed

+61
-12
lines changed

extmod/re1.5/compilecode.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num)
99
#define REL(at, to) (to - at - 2)
1010
#define EMIT(at, byte) (code ? (code[at] = byte) : (at))
11+
#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err))
1112
#define PC (prog->bytelen)
1213

13-
1414
static char unescape(char c) {
1515
switch (c) {
1616
case 'a':
@@ -33,9 +33,17 @@ static char unescape(char c) {
3333
}
3434

3535

36+
static void _emit_checked(int at, char *code, int val, bool *err) {
37+
*err |= val != (int8_t)val;
38+
if (code) {
39+
code[at] = val;
40+
}
41+
}
42+
3643
static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
3744
{
3845
char *code = sizecode ? NULL : prog->insts;
46+
bool err = false;
3947
int start = PC;
4048
int term = PC;
4149
int alt_label = 0;
@@ -96,7 +104,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
96104
EMIT(PC++, *re);
97105
}
98106
}
99-
EMIT(term + 1, cnt);
107+
EMIT_CHECKED(term + 1, cnt);
100108
break;
101109
}
102110
case '(': {
@@ -107,7 +115,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
107115
if (capture) {
108116
sub = ++prog->sub;
109117
EMIT(PC++, Save);
110-
EMIT(PC++, 2 * sub);
118+
EMIT_CHECKED(PC++, 2 * sub);
111119
prog->len++;
112120
} else {
113121
re += 2;
@@ -118,7 +126,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
118126

119127
if (capture) {
120128
EMIT(PC++, Save);
121-
EMIT(PC++, 2 * sub + 1);
129+
EMIT_CHECKED(PC++, 2 * sub + 1);
122130
prog->len++;
123131
}
124132

@@ -133,23 +141,23 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
133141
} else {
134142
EMIT(term, Split);
135143
}
136-
EMIT(term + 1, REL(term, PC));
144+
EMIT_CHECKED(term + 1, REL(term, PC));
137145
prog->len++;
138146
term = PC;
139147
break;
140148
case '*':
141149
if (PC == term) return NULL; // nothing to repeat
142150
INSERT_CODE(term, 2, PC);
143151
EMIT(PC, Jmp);
144-
EMIT(PC + 1, REL(PC, term));
152+
EMIT_CHECKED(PC + 1, REL(PC, term));
145153
PC += 2;
146154
if (re[1] == '?') {
147155
EMIT(term, RSplit);
148156
re++;
149157
} else {
150158
EMIT(term, Split);
151159
}
152-
EMIT(term + 1, REL(term, PC));
160+
EMIT_CHECKED(term + 1, REL(term, PC));
153161
prog->len += 2;
154162
term = PC;
155163
break;
@@ -161,20 +169,20 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
161169
} else {
162170
EMIT(PC, RSplit);
163171
}
164-
EMIT(PC + 1, REL(PC, term));
172+
EMIT_CHECKED(PC + 1, REL(PC, term));
165173
PC += 2;
166174
prog->len++;
167175
term = PC;
168176
break;
169177
case '|':
170178
if (alt_label) {
171-
EMIT(alt_label, REL(alt_label, PC) + 1);
179+
EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
172180
}
173181
INSERT_CODE(start, 2, PC);
174182
EMIT(PC++, Jmp);
175183
alt_label = PC++;
176184
EMIT(start, Split);
177-
EMIT(start + 1, REL(start, PC));
185+
EMIT_CHECKED(start + 1, REL(start, PC));
178186
prog->len += 2;
179187
term = PC;
180188
break;
@@ -192,9 +200,9 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
192200
}
193201

194202
if (alt_label) {
195-
EMIT(alt_label, REL(alt_label, PC) + 1);
203+
EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
196204
}
197-
return re;
205+
return err ? NULL : re;
198206
}
199207

200208
int re1_5_sizecode(const char *re)

tests/extmod/ure_limit.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Test overflow in ure.compile output code.
2+
3+
try:
4+
import ure as re
5+
except ImportError:
6+
print("SKIP")
7+
raise SystemExit
8+
9+
10+
def test_re(r):
11+
try:
12+
re.compile(r)
13+
except:
14+
print("Error")
15+
16+
17+
# too many chars in []
18+
test_re("[" + "a" * 256 + "]")
19+
20+
# too many groups
21+
test_re("(a)" * 256)
22+
23+
# jump too big for ?
24+
test_re("(" + "a" * 62 + ")?")
25+
26+
# jump too big for *
27+
test_re("(" + "a" * 60 + ".)*")
28+
test_re("(" + "a" * 60 + "..)*")
29+
30+
# jump too big for +
31+
test_re("(" + "a" * 62 + ")+")
32+
33+
# jump too big for |
34+
test_re("b" * 63 + "|a")

tests/extmod/ure_limit.py.exp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Error
2+
Error
3+
Error
4+
Error
5+
Error
6+
Error
7+
Error

0 commit comments

Comments
 (0)