Skip to content

Commit 5abbeac

Browse files
authored
Fix bug in GET_PREV_CHAR macro (#278)
* Fix bug in `GET_PREV_CHAR` macro - pass `cbuf_type` variable to `XXX_CHAR` macros in `lre_exec_backtrack()` - improve readability of these macros - fix `GET_PREV_CHAR` macro: `cptr` was decremented twice on invalid high surrogate. - minimize non functional changes
1 parent d11f5f6 commit 5abbeac

File tree

1 file changed

+38
-38
lines changed

1 file changed

+38
-38
lines changed

libregexp.c

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,86 +1964,86 @@ static BOOL is_word_char(uint32_t c)
19641964
(c == '_'));
19651965
}
19661966

1967-
#define GET_CHAR(c, cptr, cbuf_end) \
1967+
#define GET_CHAR(c, cptr, cbuf_end, cbuf_type) \
19681968
do { \
19691969
if (cbuf_type == 0) { \
19701970
c = *cptr++; \
19711971
} else { \
1972-
const uint16_t *_p = (uint16_t *)cptr; \
1973-
const uint16_t *_end = (uint16_t *)cbuf_end; \
1972+
const uint16_t *_p = (const uint16_t *)cptr; \
1973+
const uint16_t *_end = (const uint16_t *)cbuf_end; \
19741974
c = *_p++; \
19751975
if (is_hi_surrogate(c)) \
19761976
if (cbuf_type == 2) \
19771977
if (_p < _end) \
19781978
if (is_lo_surrogate(*_p)) \
19791979
c = from_surrogate(c, *_p++); \
1980-
cptr = (void *) _p; \
1980+
cptr = (const void *)_p; \
19811981
} \
19821982
} while (0)
19831983

1984-
#define PEEK_CHAR(c, cptr, cbuf_end) \
1984+
#define PEEK_CHAR(c, cptr, cbuf_end, cbuf_type) \
19851985
do { \
19861986
if (cbuf_type == 0) { \
19871987
c = cptr[0]; \
19881988
} else { \
1989-
const uint16_t *_p = (uint16_t *)cptr; \
1990-
const uint16_t *_end = (uint16_t *)cbuf_end; \
1989+
const uint16_t *_p = (const uint16_t *)cptr; \
1990+
const uint16_t *_end = (const uint16_t *)cbuf_end; \
19911991
c = *_p++; \
19921992
if (is_hi_surrogate(c)) \
19931993
if (cbuf_type == 2) \
19941994
if (_p < _end) \
19951995
if (is_lo_surrogate(*_p)) \
1996-
c = from_surrogate(c, *_p++); \
1996+
c = from_surrogate(c, *_p); \
19971997
} \
19981998
} while (0)
19991999

2000-
#define PEEK_PREV_CHAR(c, cptr, cbuf_start) \
2000+
#define PEEK_PREV_CHAR(c, cptr, cbuf_start, cbuf_type) \
20012001
do { \
20022002
if (cbuf_type == 0) { \
20032003
c = cptr[-1]; \
20042004
} else { \
2005-
const uint16_t *_p = (uint16_t *)cptr - 1; \
2006-
const uint16_t *_start = (uint16_t *)cbuf_start; \
2005+
const uint16_t *_p = (const uint16_t *)cptr - 1; \
2006+
const uint16_t *_start = (const uint16_t *)cbuf_start; \
20072007
c = *_p; \
20082008
if (is_lo_surrogate(c)) \
20092009
if (cbuf_type == 2) \
20102010
if (_p > _start) \
2011-
if (is_hi_surrogate(*--_p)) \
2012-
c = from_surrogate(*_p, c); \
2011+
if (is_hi_surrogate(_p[-1])) \
2012+
c = from_surrogate(*--_p, c); \
20132013
} \
20142014
} while (0)
20152015

2016-
#define GET_PREV_CHAR(c, cptr, cbuf_start) \
2016+
#define GET_PREV_CHAR(c, cptr, cbuf_start, cbuf_type) \
20172017
do { \
20182018
if (cbuf_type == 0) { \
20192019
cptr--; \
20202020
c = cptr[0]; \
20212021
} else { \
2022-
const uint16_t *_p = (uint16_t *)cptr - 1; \
2023-
const uint16_t *_start = (uint16_t *)cbuf_start; \
2022+
const uint16_t *_p = (const uint16_t *)cptr - 1; \
2023+
const uint16_t *_start = (const uint16_t *)cbuf_start; \
20242024
c = *_p; \
20252025
if (is_lo_surrogate(c)) \
20262026
if (cbuf_type == 2) \
20272027
if (_p > _start) \
2028-
if (is_hi_surrogate(*--_p)) \
2029-
c = from_surrogate(*_p, c); \
2030-
cptr = (void *) _p; \
2028+
if (is_hi_surrogate(_p[-1])) \
2029+
c = from_surrogate(*--_p, c); \
2030+
cptr = (const void *)_p; \
20312031
} \
20322032
} while (0)
20332033

2034-
#define PREV_CHAR(cptr, cbuf_start) \
2034+
#define PREV_CHAR(cptr, cbuf_start, cbuf_type) \
20352035
do { \
20362036
if (cbuf_type == 0) { \
20372037
cptr--; \
20382038
} else { \
2039-
const uint16_t *_p = (uint16_t *)cptr - 1; \
2040-
const uint16_t *_start = (uint16_t *)cbuf_start; \
2039+
const uint16_t *_p = (const uint16_t *)cptr - 1; \
2040+
const uint16_t *_start = (const uint16_t *)cbuf_start; \
20412041
if (is_lo_surrogate(*_p)) \
20422042
if (cbuf_type == 2) \
20432043
if (_p > _start) \
20442044
if (is_hi_surrogate(_p[-1])) \
20452045
_p--; \
2046-
cptr = (void *) _p; \
2046+
cptr = (const void *)_p; \
20472047
} \
20482048
} while (0)
20492049

@@ -2183,7 +2183,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
21832183
/* go backward */
21842184
char_count = get_u32(pc + 12);
21852185
for(i = 0; i < char_count; i++) {
2186-
PREV_CHAR(cptr, s->cbuf);
2186+
PREV_CHAR(cptr, s->cbuf, cbuf_type);
21872187
}
21882188
pc = (pc + 16) + (int)get_u32(pc);
21892189
rs->cptr = cptr;
@@ -2222,7 +2222,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
22222222
test_char:
22232223
if (cptr >= cbuf_end)
22242224
goto no_match;
2225-
GET_CHAR(c, cptr, cbuf_end);
2225+
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
22262226
if (s->ignore_case) {
22272227
c = lre_canonicalize(c, s->is_unicode);
22282228
}
@@ -2269,7 +2269,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
22692269
break;
22702270
if (!s->multi_line)
22712271
goto no_match;
2272-
PEEK_PREV_CHAR(c, cptr, s->cbuf);
2272+
PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
22732273
if (!is_line_terminator(c))
22742274
goto no_match;
22752275
break;
@@ -2278,21 +2278,21 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
22782278
break;
22792279
if (!s->multi_line)
22802280
goto no_match;
2281-
PEEK_CHAR(c, cptr, cbuf_end);
2281+
PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
22822282
if (!is_line_terminator(c))
22832283
goto no_match;
22842284
break;
22852285
case REOP_dot:
22862286
if (cptr == cbuf_end)
22872287
goto no_match;
2288-
GET_CHAR(c, cptr, cbuf_end);
2288+
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
22892289
if (is_line_terminator(c))
22902290
goto no_match;
22912291
break;
22922292
case REOP_any:
22932293
if (cptr == cbuf_end)
22942294
goto no_match;
2295-
GET_CHAR(c, cptr, cbuf_end);
2295+
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
22962296
break;
22972297
case REOP_save_start:
22982298
case REOP_save_end:
@@ -2346,14 +2346,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
23462346
if (cptr == s->cbuf) {
23472347
v1 = FALSE;
23482348
} else {
2349-
PEEK_PREV_CHAR(c, cptr, s->cbuf);
2349+
PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type);
23502350
v1 = is_word_char(c);
23512351
}
23522352
/* current char */
23532353
if (cptr >= cbuf_end) {
23542354
v2 = FALSE;
23552355
} else {
2356-
PEEK_CHAR(c, cptr, cbuf_end);
2356+
PEEK_CHAR(c, cptr, cbuf_end, cbuf_type);
23572357
v2 = is_word_char(c);
23582358
}
23592359
if (v1 ^ v2 ^ (REOP_not_word_boundary - opcode))
@@ -2378,8 +2378,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
23782378
while (cptr1 < cptr1_end) {
23792379
if (cptr >= cbuf_end)
23802380
goto no_match;
2381-
GET_CHAR(c1, cptr1, cptr1_end);
2382-
GET_CHAR(c2, cptr, cbuf_end);
2381+
GET_CHAR(c1, cptr1, cptr1_end, cbuf_type);
2382+
GET_CHAR(c2, cptr, cbuf_end, cbuf_type);
23832383
if (s->ignore_case) {
23842384
c1 = lre_canonicalize(c1, s->is_unicode);
23852385
c2 = lre_canonicalize(c2, s->is_unicode);
@@ -2392,8 +2392,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
23922392
while (cptr1 > cptr1_start) {
23932393
if (cptr == s->cbuf)
23942394
goto no_match;
2395-
GET_PREV_CHAR(c1, cptr1, cptr1_start);
2396-
GET_PREV_CHAR(c2, cptr, s->cbuf);
2395+
GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type);
2396+
GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type);
23972397
if (s->ignore_case) {
23982398
c1 = lre_canonicalize(c1, s->is_unicode);
23992399
c2 = lre_canonicalize(c2, s->is_unicode);
@@ -2413,7 +2413,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
24132413
pc += 2;
24142414
if (cptr >= cbuf_end)
24152415
goto no_match;
2416-
GET_CHAR(c, cptr, cbuf_end);
2416+
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
24172417
if (s->ignore_case) {
24182418
c = lre_canonicalize(c, s->is_unicode);
24192419
}
@@ -2453,7 +2453,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
24532453
pc += 2;
24542454
if (cptr >= cbuf_end)
24552455
goto no_match;
2456-
GET_CHAR(c, cptr, cbuf_end);
2456+
GET_CHAR(c, cptr, cbuf_end, cbuf_type);
24572457
if (s->ignore_case) {
24582458
c = lre_canonicalize(c, s->is_unicode);
24592459
}
@@ -2485,7 +2485,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
24852485
/* go to the previous char */
24862486
if (cptr == s->cbuf)
24872487
goto no_match;
2488-
PREV_CHAR(cptr, s->cbuf);
2488+
PREV_CHAR(cptr, s->cbuf, cbuf_type);
24892489
break;
24902490
case REOP_simple_greedy_quant:
24912491
{

0 commit comments

Comments
 (0)