@@ -69,7 +69,7 @@ typedef struct {
69
69
const uint8_t * buf_end ;
70
70
const uint8_t * buf_start ;
71
71
int re_flags ;
72
- BOOL is_utf16 ;
72
+ BOOL is_unicode ;
73
73
BOOL ignore_case ;
74
74
BOOL dotall ;
75
75
int capture_count ;
@@ -122,11 +122,11 @@ static int dbuf_insert(DynBuf *s, int pos, int len)
122
122
}
123
123
124
124
/* canonicalize with the specific JS regexp rules */
125
- static uint32_t lre_canonicalize (uint32_t c , BOOL is_utf16 )
125
+ static uint32_t lre_canonicalize (uint32_t c , BOOL is_unicode )
126
126
{
127
127
uint32_t res [LRE_CC_RES_LEN_MAX ];
128
128
int len ;
129
- if (is_utf16 ) {
129
+ if (is_unicode ) {
130
130
if (likely (c < 128 )) {
131
131
if (c >= 'A' && c <= 'Z' )
132
132
c = c - 'A' + 'a' ;
@@ -751,10 +751,10 @@ static int get_class_atom(REParseState *s, CharRange *cr,
751
751
if ((c >= 'a' && c <= 'z' ) ||
752
752
(c >= 'A' && c <= 'Z' ) ||
753
753
(((c >= '0' && c <= '9' ) || c == '_' ) &&
754
- inclass && !s -> is_utf16 )) { /* Annex B.1.4 */
754
+ inclass && !s -> is_unicode )) { /* Annex B.1.4 */
755
755
c &= 0x1f ;
756
756
p ++ ;
757
- } else if (s -> is_utf16 ) {
757
+ } else if (s -> is_unicode ) {
758
758
goto invalid_escape ;
759
759
} else {
760
760
/* otherwise return '\' and 'c' */
@@ -764,7 +764,7 @@ static int get_class_atom(REParseState *s, CharRange *cr,
764
764
break ;
765
765
case 'p' :
766
766
case 'P' :
767
- if (s -> is_utf16 ) {
767
+ if (s -> is_unicode ) {
768
768
if (parse_unicode_property (s , cr , & p , (c == 'P' )))
769
769
return -1 ;
770
770
c = CLASS_RANGE_BASE ;
@@ -773,14 +773,14 @@ static int get_class_atom(REParseState *s, CharRange *cr,
773
773
/* fall thru */
774
774
default :
775
775
p -- ;
776
- ret = lre_parse_escape (& p , s -> is_utf16 * 2 );
776
+ ret = lre_parse_escape (& p , s -> is_unicode * 2 );
777
777
if (ret >= 0 ) {
778
778
c = ret ;
779
779
} else {
780
780
if (ret == -2 && * p != '\0' && strchr ("^$\\.*+?()[]{}|/" , * p )) {
781
781
/* always valid to escape these characters */
782
782
goto normal_char ;
783
- } else if (s -> is_utf16 ) {
783
+ } else if (s -> is_unicode ) {
784
784
invalid_escape :
785
785
return re_parse_error (s , "invalid escape sequence in regular expression" );
786
786
} else {
@@ -802,7 +802,7 @@ static int get_class_atom(REParseState *s, CharRange *cr,
802
802
/* normal char */
803
803
if (c >= 128 ) {
804
804
c = unicode_from_utf8 (p , UTF8_CHAR_LEN_MAX , & p );
805
- if ((unsigned )c > 0xffff && !s -> is_utf16 ) {
805
+ if ((unsigned )c > 0xffff && !s -> is_unicode ) {
806
806
/* XXX: should handle non BMP-1 code points */
807
807
return re_parse_error (s , "malformed unicode char" );
808
808
}
@@ -878,7 +878,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
878
878
if (* p == '-' && p [1 ] != ']' ) {
879
879
const uint8_t * p0 = p + 1 ;
880
880
if (c1 >= CLASS_RANGE_BASE ) {
881
- if (s -> is_utf16 ) {
881
+ if (s -> is_unicode ) {
882
882
cr_free (cr1 );
883
883
goto invalid_class_range ;
884
884
}
@@ -890,7 +890,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
890
890
goto fail ;
891
891
if (c2 >= CLASS_RANGE_BASE ) {
892
892
cr_free (cr1 );
893
- if (s -> is_utf16 ) {
893
+ if (s -> is_unicode ) {
894
894
goto invalid_class_range ;
895
895
}
896
896
/* Annex B: match '-' character */
@@ -1248,7 +1248,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1248
1248
re_emit_op (s , REOP_prev );
1249
1249
break ;
1250
1250
case '{' :
1251
- if (s -> is_utf16 ) {
1251
+ if (s -> is_unicode ) {
1252
1252
return re_parse_error (s , "syntax error" );
1253
1253
} else if (!is_digit (p [1 ])) {
1254
1254
/* Annex B: we accept '{' not followed by digits as a
@@ -1300,7 +1300,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1300
1300
lookahead :
1301
1301
/* Annex B allows lookahead to be used as an atom for
1302
1302
the quantifiers */
1303
- if (!s -> is_utf16 && !is_backward_lookahead ) {
1303
+ if (!s -> is_unicode && !is_backward_lookahead ) {
1304
1304
last_atom_start = s -> byte_code .size ;
1305
1305
last_capture_count = s -> capture_count ;
1306
1306
}
@@ -1376,15 +1376,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1376
1376
/* annex B: we tolerate invalid group names in non
1377
1377
unicode mode if there is no named capture
1378
1378
definition */
1379
- if (s -> is_utf16 || re_has_named_captures (s ))
1379
+ if (s -> is_unicode || re_has_named_captures (s ))
1380
1380
return re_parse_error (s , "expecting group name" );
1381
1381
else
1382
1382
goto parse_class_atom ;
1383
1383
}
1384
1384
p1 += 3 ;
1385
1385
if (re_parse_group_name (s -> u .tmp_buf , sizeof (s -> u .tmp_buf ),
1386
1386
& p1 )) {
1387
- if (s -> is_utf16 || re_has_named_captures (s ))
1387
+ if (s -> is_unicode || re_has_named_captures (s ))
1388
1388
return re_parse_error (s , "invalid group name" );
1389
1389
else
1390
1390
goto parse_class_atom ;
@@ -1395,7 +1395,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1395
1395
after (inefficient, but hopefully not common */
1396
1396
c = re_parse_captures (s , & dummy_res , s -> u .tmp_buf );
1397
1397
if (c < 0 ) {
1398
- if (s -> is_utf16 || re_has_named_captures (s ))
1398
+ if (s -> is_unicode || re_has_named_captures (s ))
1399
1399
return re_parse_error (s , "group name not defined" );
1400
1400
else
1401
1401
goto parse_class_atom ;
@@ -1407,7 +1407,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1407
1407
case '0' :
1408
1408
p += 2 ;
1409
1409
c = 0 ;
1410
- if (s -> is_utf16 ) {
1410
+ if (s -> is_unicode ) {
1411
1411
if (is_digit (* p )) {
1412
1412
return re_parse_error (s , "invalid decimal escape in regular expression" );
1413
1413
}
@@ -1429,7 +1429,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1429
1429
1430
1430
c = parse_digits (& p , FALSE);
1431
1431
if (c < 0 || (c >= s -> capture_count && c >= re_count_captures (s ))) {
1432
- if (!s -> is_utf16 ) {
1432
+ if (!s -> is_unicode ) {
1433
1433
/* Annex B.1.4: accept legacy octal */
1434
1434
p = q ;
1435
1435
if (* p <= '7' ) {
@@ -1471,7 +1471,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1471
1471
break ;
1472
1472
case ']' :
1473
1473
case '}' :
1474
- if (s -> is_utf16 )
1474
+ if (s -> is_unicode )
1475
1475
return re_parse_error (s , "syntax error" );
1476
1476
goto parse_class_atom ;
1477
1477
default :
@@ -1493,7 +1493,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1493
1493
return -1 ;
1494
1494
} else {
1495
1495
if (s -> ignore_case )
1496
- c = lre_canonicalize (c , s -> is_utf16 );
1496
+ c = lre_canonicalize (c , s -> is_unicode );
1497
1497
if (c <= 0x7f )
1498
1498
re_emit_op_u8 (s , REOP_char8 , c );
1499
1499
else if (c <= 0xffff )
@@ -1531,7 +1531,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1531
1531
/* As an extension (see ES6 annex B), we accept '{' not
1532
1532
followed by digits as a normal atom */
1533
1533
if (!is_digit (p [1 ])) {
1534
- if (s -> is_utf16 )
1534
+ if (s -> is_unicode )
1535
1535
goto invalid_quant_count ;
1536
1536
break ;
1537
1537
}
@@ -1550,7 +1550,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
1550
1550
quant_max = INT32_MAX ; /* infinity */
1551
1551
}
1552
1552
}
1553
- if (* p != '}' && !s -> is_utf16 ) {
1553
+ if (* p != '}' && !s -> is_unicode ) {
1554
1554
/* Annex B: normal atom if invalid '{' syntax */
1555
1555
p = p1 ;
1556
1556
break ;
@@ -1839,7 +1839,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
1839
1839
s -> buf_end = s -> buf_ptr + buf_len ;
1840
1840
s -> buf_start = s -> buf_ptr ;
1841
1841
s -> re_flags = re_flags ;
1842
- s -> is_utf16 = ((re_flags & LRE_FLAG_UTF16 ) != 0 );
1842
+ s -> is_unicode = ((re_flags & LRE_FLAG_UNICODE ) != 0 );
1843
1843
is_sticky = ((re_flags & LRE_FLAG_STICKY ) != 0 );
1844
1844
s -> ignore_case = ((re_flags & LRE_FLAG_IGNORECASE ) != 0 );
1845
1845
s -> dotall = ((re_flags & LRE_FLAG_DOTALL ) != 0 );
@@ -2039,7 +2039,7 @@ typedef struct {
2039
2039
int stack_size_max ;
2040
2040
BOOL multi_line ;
2041
2041
BOOL ignore_case ;
2042
- BOOL is_utf16 ;
2042
+ BOOL is_unicode ;
2043
2043
void * opaque ; /* used for stack overflow check */
2044
2044
2045
2045
size_t state_size ;
@@ -2189,7 +2189,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2189
2189
goto no_match ;
2190
2190
GET_CHAR (c , cptr , cbuf_end );
2191
2191
if (s -> ignore_case ) {
2192
- c = lre_canonicalize (c , s -> is_utf16 );
2192
+ c = lre_canonicalize (c , s -> is_unicode );
2193
2193
}
2194
2194
if (val != c )
2195
2195
goto no_match ;
@@ -2346,8 +2346,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2346
2346
GET_CHAR (c1 , cptr1 , cptr1_end );
2347
2347
GET_CHAR (c2 , cptr , cbuf_end );
2348
2348
if (s -> ignore_case ) {
2349
- c1 = lre_canonicalize (c1 , s -> is_utf16 );
2350
- c2 = lre_canonicalize (c2 , s -> is_utf16 );
2349
+ c1 = lre_canonicalize (c1 , s -> is_unicode );
2350
+ c2 = lre_canonicalize (c2 , s -> is_unicode );
2351
2351
}
2352
2352
if (c1 != c2 )
2353
2353
goto no_match ;
@@ -2360,8 +2360,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2360
2360
GET_PREV_CHAR (c1 , cptr1 , cptr1_start );
2361
2361
GET_PREV_CHAR (c2 , cptr , s -> cbuf );
2362
2362
if (s -> ignore_case ) {
2363
- c1 = lre_canonicalize (c1 , s -> is_utf16 );
2364
- c2 = lre_canonicalize (c2 , s -> is_utf16 );
2363
+ c1 = lre_canonicalize (c1 , s -> is_unicode );
2364
+ c2 = lre_canonicalize (c2 , s -> is_unicode );
2365
2365
}
2366
2366
if (c1 != c2 )
2367
2367
goto no_match ;
@@ -2380,7 +2380,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2380
2380
goto no_match ;
2381
2381
GET_CHAR (c , cptr , cbuf_end );
2382
2382
if (s -> ignore_case ) {
2383
- c = lre_canonicalize (c , s -> is_utf16 );
2383
+ c = lre_canonicalize (c , s -> is_unicode );
2384
2384
}
2385
2385
idx_min = 0 ;
2386
2386
low = get_u16 (pc + 0 * 4 );
@@ -2420,7 +2420,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
2420
2420
goto no_match ;
2421
2421
GET_CHAR (c , cptr , cbuf_end );
2422
2422
if (s -> ignore_case ) {
2423
- c = lre_canonicalize (c , s -> is_utf16 );
2423
+ c = lre_canonicalize (c , s -> is_unicode );
2424
2424
}
2425
2425
idx_min = 0 ;
2426
2426
low = get_u32 (pc + 0 * 8 );
@@ -2512,13 +2512,13 @@ int lre_exec(uint8_t **capture,
2512
2512
re_flags = bc_buf [RE_HEADER_FLAGS ];
2513
2513
s -> multi_line = (re_flags & LRE_FLAG_MULTILINE ) != 0 ;
2514
2514
s -> ignore_case = (re_flags & LRE_FLAG_IGNORECASE ) != 0 ;
2515
- s -> is_utf16 = (re_flags & LRE_FLAG_UTF16 ) != 0 ;
2515
+ s -> is_unicode = (re_flags & LRE_FLAG_UNICODE ) != 0 ;
2516
2516
s -> capture_count = bc_buf [RE_HEADER_CAPTURE_COUNT ];
2517
2517
s -> stack_size_max = bc_buf [RE_HEADER_STACK_SIZE ];
2518
2518
s -> cbuf = cbuf ;
2519
2519
s -> cbuf_end = cbuf + (clen << cbuf_type );
2520
2520
s -> cbuf_type = cbuf_type ;
2521
- if (s -> cbuf_type == 1 && s -> is_utf16 )
2521
+ if (s -> cbuf_type == 1 && s -> is_unicode )
2522
2522
s -> cbuf_type = 2 ;
2523
2523
s -> opaque = opaque ;
2524
2524
0 commit comments