Skip to content

Commit f5c388d

Browse files
authored
Fix regexp case insensitive flag (#531)
Ref: bellard/quickjs@af30861
1 parent ac958f1 commit f5c388d

File tree

6 files changed

+516
-253
lines changed

6 files changed

+516
-253
lines changed

libregexp.c

Lines changed: 1 addition & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@
3434
/*
3535
TODO:
3636
37-
- Add full unicode canonicalize rules for character ranges (not
38-
really useful but needed for exact "ignorecase" compatibility).
39-
4037
- Add a lock step execution mode (=linear time execution guaranteed)
4138
when the regular expression is "simple" i.e. no backreference nor
4239
complicated lookahead. The opcodes are designed for this execution
@@ -123,33 +120,6 @@ static int dbuf_insert(DynBuf *s, int pos, int len)
123120
return 0;
124121
}
125122

126-
/* canonicalize with the specific JS regexp rules */
127-
static uint32_t lre_canonicalize(uint32_t c, BOOL is_unicode)
128-
{
129-
uint32_t res[LRE_CC_RES_LEN_MAX];
130-
int len;
131-
if (is_unicode) {
132-
if (likely(c < 128)) {
133-
if (c >= 'A' && c <= 'Z')
134-
c = c - 'A' + 'a';
135-
} else {
136-
lre_case_conv(res, c, 2);
137-
c = res[0];
138-
}
139-
} else {
140-
if (likely(c < 128)) {
141-
if (c >= 'a' && c <= 'z')
142-
c = c - 'a' + 'A';
143-
} else {
144-
/* legacy regexp: to upper case if single char >= 128 */
145-
len = lre_case_conv(res, c, FALSE);
146-
if (len == 1 && res[0] >= 128)
147-
c = res[0];
148-
}
149-
}
150-
return c;
151-
}
152-
153123
static const uint16_t char_range_d[] = {
154124
1,
155125
0x0030, 0x0039 + 1,
@@ -248,31 +218,6 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
248218
return -1;
249219
}
250220

251-
static int cr_canonicalize(CharRange *cr)
252-
{
253-
CharRange a;
254-
uint32_t pt[2];
255-
int i, ret;
256-
257-
cr_init(&a, cr->mem_opaque, lre_realloc);
258-
pt[0] = 'a';
259-
pt[1] = 'z' + 1;
260-
ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER);
261-
if (ret)
262-
goto fail;
263-
/* convert to upper case */
264-
/* XXX: the generic unicode case would be much more complicated
265-
and not really useful */
266-
for(i = 0; i < a.len; i++) {
267-
a.points[i] += 'A' - 'a';
268-
}
269-
/* Note: for simplicity we keep the lower case ranges */
270-
ret = cr_union1(cr, a.points, a.len);
271-
fail:
272-
cr_free(&a);
273-
return ret;
274-
}
275-
276221
#ifdef DUMP_REOP
277222
static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
278223
int buf_len)
@@ -955,7 +900,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
955900
}
956901
}
957902
if (s->ignore_case) {
958-
if (cr_canonicalize(cr))
903+
if (cr_regexp_canonicalize(cr, s->is_unicode))
959904
goto memory_error;
960905
}
961906
if (invert) {

libunicode-table.h

Lines changed: 61 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3777,72 +3777,70 @@ static const uint8_t unicode_prop_Changes_When_Titlecased1_table[22] = {
37773777
0x8b, 0x80, 0x8e, 0x80, 0xae, 0x80,
37783778
};
37793779

3780-
static const uint8_t unicode_prop_Changes_When_Casefolded1_table[33] = {
3781-
0x40, 0xde, 0x80, 0xcf, 0x80, 0x97, 0x80, 0x44,
3782-
0x3c, 0x80, 0x59, 0x11, 0x80, 0x40, 0xe4, 0x3f,
3783-
0x3f, 0x87, 0x89, 0x11, 0x05, 0x02, 0x11, 0x80,
3784-
0xa9, 0x11, 0x80, 0x60, 0xdb, 0x07, 0x86, 0x8b,
3785-
0x84,
3780+
static const uint8_t unicode_prop_Changes_When_Casefolded1_table[29] = {
3781+
0x41, 0xef, 0x80, 0x41, 0x9e, 0x80, 0x9e, 0x80,
3782+
0x5a, 0xe4, 0x83, 0x40, 0xb5, 0x00, 0x00, 0x00,
3783+
0x80, 0xde, 0x06, 0x06, 0x80, 0x8a, 0x09, 0x81,
3784+
0x89, 0x10, 0x81, 0x8d, 0x80,
37863785
};
37873786

3788-
static const uint8_t unicode_prop_Changes_When_NFKC_Casefolded1_table[451] = {
3787+
static const uint8_t unicode_prop_Changes_When_NFKC_Casefolded1_table[447] = {
37893788
0x40, 0x9f, 0x06, 0x00, 0x01, 0x00, 0x01, 0x12,
3790-
0x10, 0x82, 0x9f, 0x80, 0xcf, 0x01, 0x80, 0x8b,
3791-
0x07, 0x80, 0xfb, 0x01, 0x01, 0x80, 0xa5, 0x80,
3792-
0x40, 0xbb, 0x88, 0x9e, 0x29, 0x84, 0xda, 0x08,
3793-
0x81, 0x89, 0x80, 0xa3, 0x04, 0x02, 0x04, 0x08,
3794-
0x80, 0xc9, 0x82, 0x9c, 0x80, 0x41, 0x93, 0x80,
3795-
0x40, 0x93, 0x80, 0xd7, 0x83, 0x42, 0xde, 0x87,
3796-
0xfb, 0x08, 0x80, 0xd2, 0x01, 0x80, 0xa1, 0x11,
3797-
0x80, 0x40, 0xfc, 0x81, 0x42, 0xd4, 0x80, 0xfe,
3798-
0x80, 0xa7, 0x81, 0xad, 0x80, 0xb5, 0x80, 0x88,
3799-
0x03, 0x03, 0x03, 0x80, 0x8b, 0x80, 0x88, 0x00,
3800-
0x26, 0x80, 0x90, 0x80, 0x88, 0x03, 0x03, 0x03,
3801-
0x80, 0x8b, 0x80, 0x41, 0x41, 0x80, 0xe1, 0x81,
3802-
0x46, 0x52, 0x81, 0xd4, 0x84, 0x45, 0x1b, 0x10,
3803-
0x8a, 0x80, 0x91, 0x80, 0x9b, 0x8c, 0x80, 0xa1,
3804-
0xa4, 0x40, 0xd9, 0x80, 0x40, 0xd5, 0x00, 0x00,
3805-
0x00, 0x00, 0x00, 0x00, 0x01, 0x3f, 0x3f, 0x87,
3806-
0x89, 0x11, 0x04, 0x00, 0x29, 0x04, 0x12, 0x80,
3807-
0x88, 0x12, 0x80, 0x88, 0x11, 0x11, 0x04, 0x08,
3808-
0x8f, 0x00, 0x20, 0x8b, 0x12, 0x2a, 0x08, 0x0b,
3809-
0x00, 0x07, 0x82, 0x8c, 0x06, 0x92, 0x81, 0x9a,
3810-
0x80, 0x8c, 0x8a, 0x80, 0xd6, 0x18, 0x10, 0x8a,
3811-
0x01, 0x0c, 0x0a, 0x00, 0x10, 0x11, 0x02, 0x06,
3812-
0x05, 0x1c, 0x85, 0x8f, 0x8f, 0x8f, 0x88, 0x80,
3813-
0x40, 0xa1, 0x08, 0x81, 0x40, 0xf7, 0x81, 0x41,
3814-
0x34, 0xd5, 0x99, 0x9a, 0x45, 0x20, 0x80, 0xe6,
3815-
0x82, 0xe4, 0x80, 0x41, 0x9e, 0x81, 0x40, 0xf0,
3816-
0x80, 0x41, 0x2e, 0x80, 0xd2, 0x80, 0x8b, 0x40,
3817-
0xd5, 0xa9, 0x80, 0xb4, 0x00, 0x82, 0xdf, 0x09,
3818-
0x80, 0xde, 0x80, 0xb0, 0xdd, 0x82, 0x8d, 0xdf,
3819-
0x9e, 0x80, 0xa7, 0x87, 0xae, 0x80, 0x41, 0x7f,
3820-
0x60, 0x72, 0x9b, 0x81, 0x40, 0xd1, 0x80, 0x40,
3821-
0x80, 0x12, 0x81, 0x43, 0x61, 0x83, 0x88, 0x80,
3822-
0x60, 0x4d, 0x95, 0x41, 0x0d, 0x08, 0x00, 0x81,
3823-
0x89, 0x00, 0x00, 0x09, 0x82, 0xc3, 0x81, 0xe9,
3824-
0xa5, 0x86, 0x8b, 0x24, 0x00, 0x97, 0x04, 0x00,
3825-
0x01, 0x01, 0x80, 0xeb, 0xa0, 0x41, 0x6a, 0x91,
3826-
0xbf, 0x81, 0xb5, 0xa7, 0x8c, 0x82, 0x99, 0x95,
3827-
0x94, 0x81, 0x8b, 0x80, 0x92, 0x03, 0x1a, 0x00,
3828-
0x80, 0x40, 0x86, 0x08, 0x80, 0x9f, 0x99, 0x40,
3829-
0x83, 0x15, 0x0d, 0x0d, 0x0a, 0x16, 0x06, 0x80,
3830-
0x88, 0x47, 0x87, 0x20, 0xa9, 0x80, 0x88, 0x60,
3831-
0xb4, 0xe4, 0x83, 0x54, 0xb9, 0x86, 0x8d, 0x87,
3832-
0xbf, 0x85, 0x42, 0x3e, 0xd4, 0x80, 0xc6, 0x01,
3833-
0x08, 0x09, 0x0b, 0x80, 0x8b, 0x00, 0x06, 0x80,
3834-
0xc0, 0x03, 0x0f, 0x06, 0x80, 0x9b, 0x03, 0x04,
3835-
0x00, 0x16, 0x80, 0x41, 0x53, 0x81, 0x41, 0x23,
3836-
0x81, 0xb1, 0x48, 0x2f, 0xbd, 0x4d, 0x91, 0x18,
3837-
0x9a, 0x01, 0x00, 0x08, 0x80, 0x89, 0x03, 0x00,
3838-
0x00, 0x28, 0x18, 0x00, 0x00, 0x02, 0x01, 0x00,
3839-
0x08, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x0b,
3840-
0x06, 0x03, 0x03, 0x00, 0x80, 0x89, 0x80, 0x90,
3841-
0x22, 0x04, 0x80, 0x90, 0x42, 0x43, 0x8a, 0x84,
3842-
0x9e, 0x80, 0x9f, 0x99, 0x82, 0xa2, 0x80, 0xee,
3843-
0x82, 0x8c, 0xab, 0x83, 0x88, 0x31, 0x49, 0x9d,
3844-
0x89, 0x60, 0xfc, 0x05, 0x42, 0x1d, 0x6b, 0x05,
3845-
0xe1, 0x4f, 0xff,
3789+
0x10, 0x82, 0xf3, 0x80, 0x8b, 0x80, 0x40, 0x84,
3790+
0x01, 0x01, 0x80, 0xa2, 0x01, 0x80, 0x40, 0xbb,
3791+
0x88, 0x9e, 0x29, 0x84, 0xda, 0x08, 0x81, 0x89,
3792+
0x80, 0xa3, 0x04, 0x02, 0x04, 0x08, 0x07, 0x80,
3793+
0x9e, 0x80, 0xa0, 0x82, 0x9c, 0x80, 0x42, 0x28,
3794+
0x80, 0xd7, 0x83, 0x42, 0xde, 0x87, 0xfb, 0x08,
3795+
0x80, 0xd2, 0x01, 0x80, 0xa1, 0x11, 0x80, 0x40,
3796+
0xfc, 0x81, 0x42, 0xd4, 0x80, 0xfe, 0x80, 0xa7,
3797+
0x81, 0xad, 0x80, 0xb5, 0x80, 0x88, 0x03, 0x03,
3798+
0x03, 0x80, 0x8b, 0x80, 0x88, 0x00, 0x26, 0x80,
3799+
0x90, 0x80, 0x88, 0x03, 0x03, 0x03, 0x80, 0x8b,
3800+
0x80, 0x41, 0x41, 0x80, 0xe1, 0x81, 0x46, 0x52,
3801+
0x81, 0xd4, 0x84, 0x45, 0x1b, 0x10, 0x8a, 0x80,
3802+
0x91, 0x80, 0x9b, 0x8c, 0x80, 0xa1, 0xa4, 0x40,
3803+
0xd5, 0x83, 0x40, 0xb5, 0x00, 0x00, 0x00, 0x80,
3804+
0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
3805+
0xb7, 0x05, 0x00, 0x13, 0x05, 0x11, 0x02, 0x0c,
3806+
0x11, 0x00, 0x00, 0x0c, 0x15, 0x05, 0x08, 0x8f,
3807+
0x00, 0x20, 0x8b, 0x12, 0x2a, 0x08, 0x0b, 0x00,
3808+
0x07, 0x82, 0x8c, 0x06, 0x92, 0x81, 0x9a, 0x80,
3809+
0x8c, 0x8a, 0x80, 0xd6, 0x18, 0x10, 0x8a, 0x01,
3810+
0x0c, 0x0a, 0x00, 0x10, 0x11, 0x02, 0x06, 0x05,
3811+
0x1c, 0x85, 0x8f, 0x8f, 0x8f, 0x88, 0x80, 0x40,
3812+
0xa1, 0x08, 0x81, 0x40, 0xf7, 0x81, 0x41, 0x34,
3813+
0xd5, 0x99, 0x9a, 0x45, 0x20, 0x80, 0xe6, 0x82,
3814+
0xe4, 0x80, 0x41, 0x9e, 0x81, 0x40, 0xf0, 0x80,
3815+
0x41, 0x2e, 0x80, 0xd2, 0x80, 0x8b, 0x40, 0xd5,
3816+
0xa9, 0x80, 0xb4, 0x00, 0x82, 0xdf, 0x09, 0x80,
3817+
0xde, 0x80, 0xb0, 0xdd, 0x82, 0x8d, 0xdf, 0x9e,
3818+
0x80, 0xa7, 0x87, 0xae, 0x80, 0x41, 0x7f, 0x60,
3819+
0x72, 0x9b, 0x81, 0x40, 0xd1, 0x80, 0x40, 0x80,
3820+
0x12, 0x81, 0x43, 0x61, 0x83, 0x88, 0x80, 0x60,
3821+
0x4d, 0x95, 0x41, 0x0d, 0x08, 0x00, 0x81, 0x89,
3822+
0x00, 0x00, 0x09, 0x82, 0xc3, 0x81, 0xe9, 0xc2,
3823+
0x00, 0x97, 0x04, 0x00, 0x01, 0x01, 0x80, 0xeb,
3824+
0xa0, 0x41, 0x6a, 0x91, 0xbf, 0x81, 0xb5, 0xa7,
3825+
0x8c, 0x82, 0x99, 0x95, 0x94, 0x81, 0x8b, 0x80,
3826+
0x92, 0x03, 0x1a, 0x00, 0x80, 0x40, 0x86, 0x08,
3827+
0x80, 0x9f, 0x99, 0x40, 0x83, 0x15, 0x0d, 0x0d,
3828+
0x0a, 0x16, 0x06, 0x80, 0x88, 0x47, 0x87, 0x20,
3829+
0xa9, 0x80, 0x88, 0x60, 0xb4, 0xe4, 0x83, 0x54,
3830+
0xb9, 0x86, 0x8d, 0x87, 0xbf, 0x85, 0x42, 0x3e,
3831+
0xd4, 0x80, 0xc6, 0x01, 0x08, 0x09, 0x0b, 0x80,
3832+
0x8b, 0x00, 0x06, 0x80, 0xc0, 0x03, 0x0f, 0x06,
3833+
0x80, 0x9b, 0x03, 0x04, 0x00, 0x16, 0x80, 0x41,
3834+
0x53, 0x81, 0x41, 0x23, 0x81, 0xb1, 0x48, 0x2f,
3835+
0xbd, 0x4d, 0x91, 0x18, 0x9a, 0x01, 0x00, 0x08,
3836+
0x80, 0x89, 0x03, 0x00, 0x00, 0x28, 0x18, 0x00,
3837+
0x00, 0x02, 0x01, 0x00, 0x08, 0x00, 0x00, 0x00,
3838+
0x00, 0x01, 0x00, 0x0b, 0x06, 0x03, 0x03, 0x00,
3839+
0x80, 0x89, 0x80, 0x90, 0x22, 0x04, 0x80, 0x90,
3840+
0x42, 0x43, 0x8a, 0x84, 0x9e, 0x80, 0x9f, 0x99,
3841+
0x82, 0xa2, 0x80, 0xee, 0x82, 0x8c, 0xab, 0x83,
3842+
0x88, 0x31, 0x49, 0x9d, 0x89, 0x60, 0xfc, 0x05,
3843+
0x42, 0x1d, 0x6b, 0x05, 0xe1, 0x4f, 0xff,
38463844
};
38473845

38483846
static const uint8_t unicode_prop_ASCII_Hex_Digit_table[5] = {

0 commit comments

Comments
 (0)