Skip to content

Commit f95b881

Browse files
authored
Optimize out unnecessary eclass bitmaps (#596)
1 parent 94a0118 commit f95b881

File tree

7 files changed

+342
-313
lines changed

7 files changed

+342
-313
lines changed

HACKING

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -666,9 +666,8 @@ inside an OP_ECLASS at match time. They are:
666666
ECL_ANY match all characters; no additional data
667667
ECL_NONE match no characters; no additional data
668668

669-
The meaning of the bitmap indicated by ECL_MAP is different to that of XCL_MAP
670-
for OP_XCLASS, in one way. The ECL_MAP bitmap is present whenever any code
671-
points < 256 match the class.
669+
The meaning of the bitmap indicated by ECL_MAP is the same as XCL_MAP.
670+
If the bitmap is present, all codepoints < 256 are checked against the bitmap.
672671

673672

674673
Back references

src/pcre2_compile.c

Lines changed: 3 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -6190,198 +6190,9 @@ for (;; pptr++)
61906190

61916191
if ((*pptr & CLASS_IS_ECLASS) != 0)
61926192
{
6193-
eclass_op_info op_info;
6194-
PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0;
6195-
BOOL allbitsone = TRUE;
6196-
BOOL allbitszero = TRUE;
6197-
6198-
previous = code;
6199-
*code++ = OP_ECLASS;
6200-
code += LINK_SIZE;
6201-
*code++ = 0; /* Flags, currently zero. */
6202-
if (!PRIV(compile_class_nested)(options, xoptions, FALSE, &pptr, &code,
6203-
&op_info, errorcodeptr, cb, lengthptr))
6193+
if (!PRIV(compile_class_nested)(options, xoptions, &pptr, &code,
6194+
errorcodeptr, cb, lengthptr))
62046195
return 0;
6205-
6206-
if (lengthptr != NULL)
6207-
{
6208-
*lengthptr += code - previous;
6209-
code = previous;
6210-
/* (*lengthptr - previous_length) now holds the amount of buffer that
6211-
we require to make the call to compile_class_nested() with
6212-
lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out
6213-
before that call. */
6214-
}
6215-
6216-
/* Do some useful counting of what's in the bitmap. */
6217-
for (int i = 0; i < 8; i++)
6218-
if (op_info.bits.classwords[i] != 0xffffffff)
6219-
{
6220-
allbitsone = FALSE;
6221-
break;
6222-
}
6223-
for (int i = 0; i < 8; i++)
6224-
if (op_info.bits.classwords[i] != 0)
6225-
{
6226-
allbitszero = FALSE;
6227-
break;
6228-
}
6229-
6230-
/* After constant-folding the extended class syntax, it may turn out to be
6231-
a simple class after all. In that case, we can unwrap it from the
6232-
OP_ECLASS container - and in fact, we must do so, because in 8-bit
6233-
no-Unicode mode the matcher is compiled without support for OP_ECLASS. */
6234-
6235-
#ifndef SUPPORT_WIDE_CHARS
6236-
PCRE2_ASSERT(op_info.op_single_type != 0);
6237-
#else
6238-
if (op_info.op_single_type != 0)
6239-
#endif
6240-
{
6241-
/* Rewind back over the OP_ECLASS. */
6242-
code = previous;
6243-
6244-
/* If the bits are all ones, and the "high characters" are all matched
6245-
too, we use a special-cased encoding of OP_ALLANY. */
6246-
6247-
if (op_info.op_single_type == ECL_ANY && allbitsone)
6248-
{
6249-
/* Advancing code means rewinding lengthptr, at this point. */
6250-
if (lengthptr != NULL) *lengthptr -= 1;
6251-
*code++ = OP_ALLANY;
6252-
}
6253-
6254-
/* If the high bits are all matched / all not-matched, then we emit an
6255-
OP_NCLASS/OP_CLASS respectively. */
6256-
6257-
else if (op_info.op_single_type == ECL_ANY ||
6258-
op_info.op_single_type == ECL_NONE)
6259-
{
6260-
PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR));
6261-
6262-
if (lengthptr != NULL)
6263-
{
6264-
if (required_len > (*lengthptr - previous_length))
6265-
*lengthptr = previous_length + required_len;
6266-
}
6267-
6268-
/* Advancing code means rewinding lengthptr, at this point. */
6269-
if (lengthptr != NULL) *lengthptr -= required_len;
6270-
*code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS;
6271-
memcpy(code, op_info.bits.classbits, 32);
6272-
code += 32 / sizeof(PCRE2_UCHAR);
6273-
}
6274-
6275-
/* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data
6276-
there, but, we pulled out its bitmap into op_info, so now we have to
6277-
put that back into the OP_XCLASS. */
6278-
6279-
else
6280-
{
6281-
#ifndef SUPPORT_WIDE_CHARS
6282-
PCRE2_DEBUG_UNREACHABLE();
6283-
#else
6284-
BOOL need_map;
6285-
PCRE2_SIZE required_len;
6286-
6287-
PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS);
6288-
need_map = !allbitszero;
6289-
required_len =
6290-
op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0);
6291-
6292-
if (lengthptr != NULL)
6293-
{
6294-
/* Don't unconditionally request all the space we need - we may
6295-
already have asked for more during processing of the ECLASS. */
6296-
if (required_len > (*lengthptr - previous_length))
6297-
*lengthptr = previous_length + required_len;
6298-
6299-
/* The code we write out here won't be ignored, even during the
6300-
(lengthptr != NULL) phase, because if there's a following quantifier
6301-
it will peek backwards. So we do have to write out a (truncated)
6302-
OP_XCLASS, even on this branch. */
6303-
*lengthptr -= 1 + LINK_SIZE + 1;
6304-
*code++ = OP_XCLASS;
6305-
PUT(code, 0, 1 + LINK_SIZE + 1);
6306-
code += LINK_SIZE;
6307-
*code++ = 0;
6308-
}
6309-
else
6310-
{
6311-
PCRE2_UCHAR *rest;
6312-
PCRE2_SIZE rest_len;
6313-
PCRE2_UCHAR flags;
6314-
6315-
/* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */
6316-
PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1);
6317-
rest = op_info.code_start + 1 + LINK_SIZE + 1;
6318-
rest_len = (op_info.code_start + op_info.length) - rest;
6319-
6320-
/* First read any data we use, before memmove splats it. */
6321-
flags = op_info.code_start[1 + LINK_SIZE];
6322-
PCRE2_ASSERT((flags & XCL_MAP) == 0);
6323-
6324-
/* Next do the memmove before any writes. */
6325-
memmove(
6326-
code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0),
6327-
rest, CU2BYTES(rest_len));
6328-
6329-
/* Finally write the header data. */
6330-
*code++ = OP_XCLASS;
6331-
PUT(code, 0, (int)required_len);
6332-
code += LINK_SIZE;
6333-
*code++ = flags | (need_map? XCL_MAP : 0);
6334-
if (need_map)
6335-
{
6336-
memcpy(code, op_info.bits.classbits, 32);
6337-
code += 32 / sizeof(PCRE2_UCHAR);
6338-
}
6339-
code += rest_len;
6340-
}
6341-
#endif /* SUPPORT_WIDE_CHARS */
6342-
}
6343-
}
6344-
6345-
/* Otherwise, we're going to keep the OP_ECLASS. However, again we need
6346-
to do some adjustment to insert the bitmap if we have one. */
6347-
6348-
#ifdef SUPPORT_WIDE_CHARS
6349-
else
6350-
{
6351-
BOOL need_map = !allbitszero;
6352-
PCRE2_SIZE required_len = 1 + LINK_SIZE + 1 +
6353-
(need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length;
6354-
6355-
if (lengthptr != NULL)
6356-
{
6357-
if (required_len > (*lengthptr - previous_length))
6358-
*lengthptr = previous_length + required_len;
6359-
6360-
/* As for the XCLASS branch above, we do have to write out a dummy
6361-
OP_ECLASS, because of the backwards peek by the quantifier code. Write
6362-
out a (truncated) OP_ECLASS, even on this branch. */
6363-
*lengthptr -= 1 + LINK_SIZE + 1;
6364-
*code++ = OP_ECLASS;
6365-
PUT(code, 0, 1 + LINK_SIZE + 1);
6366-
code += LINK_SIZE;
6367-
*code++ = 0;
6368-
}
6369-
else
6370-
{
6371-
if (need_map)
6372-
{
6373-
PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1;
6374-
previous[1 + LINK_SIZE] |= ECL_MAP;
6375-
memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start,
6376-
CU2BYTES(code - map_start));
6377-
memcpy(map_start, op_info.bits.classbits, 32);
6378-
code += 32 / sizeof(PCRE2_UCHAR);
6379-
}
6380-
PUT(previous, 1, (int)(code - previous));
6381-
}
6382-
}
6383-
#endif /* SUPPORT_WIDE_CHARS */
6384-
63856196
goto CLASS_END_PROCESSING;
63866197
}
63876198

@@ -6506,7 +6317,7 @@ for (;; pptr++)
65066317
/* Now emit the OP_CLASS/OP_NCLASS/OP_XCLASS/OP_ALLANY opcode. */
65076318

65086319
pptr = PRIV(compile_class_not_nested)(options, xoptions, pptr + 1,
6509-
&code, meta == META_CLASS_NOT, FALSE,
6320+
&code, meta == META_CLASS_NOT, NULL,
65106321
errorcodeptr, cb, lengthptr);
65116322
if (pptr == NULL) return 0;
65126323
PCRE2_ASSERT(*pptr == META_CLASS_END);

src/pcre2_compile.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
263263
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
264264

265265
uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
266-
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL always_map,
266+
uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
267267
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
268268

269269
/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
@@ -272,8 +272,8 @@ start at a META_CLASS or META_CLASS_NOT.
272272
The pptr will be left pointing at the matching META_CLASS_END. */
273273

274274
BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
275-
BOOL negated, uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
276-
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
275+
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
276+
compile_block *cb, PCRE2_SIZE *lengthptr);
277277

278278
#endif /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
279279

0 commit comments

Comments
 (0)