Skip to content

Commit 0d2c59d

Browse files
authored
Remove PT_ANY support from matchers (#605)
1 parent 0212293 commit 0d2c59d

File tree

10 files changed

+102
-147
lines changed

10 files changed

+102
-147
lines changed

src/pcre2_auto_possess.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -127,21 +127,21 @@ opcode is used to select the column. The values are as follows:
127127
*/
128128

129129
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
130-
/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
131-
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
132-
{ 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */
133-
{ 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
134-
{ 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
135-
{ 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
136-
{ 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */
137-
{ 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */
138-
{ 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */
139-
{ 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */
140-
{ 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */
141-
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
142-
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
143-
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
144-
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */
130+
/* LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
131+
{ 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */
132+
{ 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
133+
{ 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
134+
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
135+
{ 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */
136+
{ 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */
137+
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */
138+
{ 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */
139+
{ 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */
140+
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
141+
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
142+
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
143+
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */
144+
/* PT_ANY does not need a record. */
145145
};
146146

147147
/* This table is used to check whether auto-possessification is possible

src/pcre2_compile.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8052,12 +8052,19 @@ for (;; pptr++)
80528052
pdata = 0;
80538053
}
80548054

8055-
/* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit
8056-
from the auto-anchoring code. */
8055+
/* The special case of \p{Any} is compiled to OP_ALLANY and \P{Any}
8056+
is compiled to [] so as to benefit from the auto-anchoring code. */
80578057

8058-
if (meta_arg == ESC_p && ptype == PT_ANY)
8058+
if (ptype == PT_ANY)
80598059
{
8060-
*code++ = OP_ALLANY;
8060+
if (meta_arg == ESC_P)
8061+
{
8062+
*code++ = OP_CLASS;
8063+
memset(code, 0, 32);
8064+
code += 32 / sizeof(PCRE2_UCHAR);
8065+
}
8066+
else
8067+
*code++ = OP_ALLANY;
80618068
}
80628069
else
80638070
{

src/pcre2_compile_class.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ while (TRUE)
408408
case ESC_p:
409409
case ESC_P:
410410
ptr++;
411-
if (meta_arg == ESC_p && *ptr == PT_ANY)
411+
if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY)
412412
{
413413
if (buffer != NULL)
414414
{

src/pcre2_dfa_match.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,10 +1173,6 @@ for (;;)
11731173
const ucd_record * prop = GET_UCD(c);
11741174
switch(code[1])
11751175
{
1176-
case PT_ANY:
1177-
OK = TRUE;
1178-
break;
1179-
11801176
case PT_LAMP:
11811177
chartype = prop->chartype;
11821178
OK = chartype == ucp_Lu || chartype == ucp_Ll ||
@@ -1456,10 +1452,6 @@ for (;;)
14561452
const ucd_record * prop = GET_UCD(c);
14571453
switch(code[2])
14581454
{
1459-
case PT_ANY:
1460-
OK = TRUE;
1461-
break;
1462-
14631455
case PT_LAMP:
14641456
chartype = prop->chartype;
14651457
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -1721,10 +1713,6 @@ for (;;)
17211713
const ucd_record * prop = GET_UCD(c);
17221714
switch(code[2])
17231715
{
1724-
case PT_ANY:
1725-
OK = TRUE;
1726-
break;
1727-
17281716
case PT_LAMP:
17291717
chartype = prop->chartype;
17301718
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -2011,10 +1999,6 @@ for (;;)
20111999
const ucd_record * prop = GET_UCD(c);
20122000
switch(code[1 + IMM2_SIZE + 1])
20132001
{
2014-
case PT_ANY:
2015-
OK = TRUE;
2016-
break;
2017-
20182002
case PT_LAMP:
20192003
chartype = prop->chartype;
20202004
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;

src/pcre2_internal.h

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,21 +1321,22 @@ only. */
13211321
changed, the autopossessifying table in pcre2_auto_possess.c must be updated to
13221322
match. */
13231323

1324-
#define PT_ANY 0 /* Any property - matches all chars */
1325-
#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */
1326-
#define PT_GC 2 /* Specified general characteristic (e.g. L) */
1327-
#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */
1328-
#define PT_SC 4 /* Script only (e.g. Han) */
1329-
#define PT_SCX 5 /* Script extensions (includes SC) */
1330-
#define PT_ALNUM 6 /* Alphanumeric - the union of L and N */
1331-
#define PT_SPACE 7 /* Perl space - general category Z plus 9,10,12,13 */
1332-
#define PT_PXSPACE 8 /* POSIX space - Z plus 9,10,11,12,13 */
1333-
#define PT_WORD 9 /* Word - L, N, Mn, or Pc */
1334-
#define PT_CLIST 10 /* Pseudo-property: match character list */
1335-
#define PT_UCNC 11 /* Universal Character nameable character */
1336-
#define PT_BIDICL 12 /* Specified bidi class */
1337-
#define PT_BOOL 13 /* Boolean property */
1338-
#define PT_TABSIZE 14 /* Size of square table for autopossessify tests */
1324+
#define PT_LAMP 0 /* L& - the union of Lu, Ll, Lt */
1325+
#define PT_GC 1 /* Specified general characteristic (e.g. L) */
1326+
#define PT_PC 2 /* Specified particular characteristic (e.g. Lu) */
1327+
#define PT_SC 3 /* Script only (e.g. Han) */
1328+
#define PT_SCX 4 /* Script extensions (includes SC) */
1329+
#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */
1330+
#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */
1331+
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
1332+
#define PT_WORD 8 /* Word - L, N, Mn, or Pc */
1333+
#define PT_CLIST 9 /* Pseudo-property: match character list */
1334+
#define PT_UCNC 10 /* Universal Character nameable character */
1335+
#define PT_BIDICL 11 /* Specified bidi class */
1336+
#define PT_BOOL 12 /* Boolean property */
1337+
#define PT_ANY 13 /* Must be the last entry!
1338+
Any property - matches all chars */
1339+
#define PT_TABSIZE PT_ANY /* Size of square table for autopossessify tests */
13391340

13401341
/* The following special properties are used only in XCLASS items, when POSIX
13411342
classes are specified and PCRE2_UCP is set - in other words, for Unicode

src/pcre2_jit_char_inc.h

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -539,14 +539,6 @@ while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
539539

540540
switch(*cc)
541541
{
542-
case PT_ANY:
543-
/* Any either accepts everything or ignored. */
544-
if (cc[-1] == XCL_PROP)
545-
items = UCPCAT_ALL;
546-
else
547-
compares--;
548-
break;
549-
550542
case PT_LAMP:
551543
items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
552544
break;
@@ -653,21 +645,7 @@ if (*cc != XCL_END)
653645
}
654646

655647
#ifdef SUPPORT_UNICODE
656-
if (compares == 0 && category_list == 0)
657-
{
658-
/* No or all characters are accepted. */
659-
if (status & XCLASS_IS_ECLASS)
660-
{
661-
if (list == backtracks)
662-
OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
663-
return;
664-
}
665-
666-
compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
667-
if (list != backtracks)
668-
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
669-
return;
670-
}
648+
SLJIT_ASSERT(compares > 0 || category_list != 0);
671649
#else /* !SUPPORT_UNICODE */
672650
SLJIT_ASSERT(compares > 0);
673651
#endif /* SUPPORT_UNICODE */
@@ -902,7 +880,6 @@ while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
902880
cc++;
903881
switch(*cc)
904882
{
905-
case PT_ANY:
906883
case PT_LAMP:
907884
case PT_GC:
908885
case PT_PC:

0 commit comments

Comments
 (0)