Skip to content

Commit 833ab6d

Browse files
authored
Improve xclass range list matching (#567)
1 parent 793ce6b commit 833ab6d

File tree

1 file changed

+62
-46
lines changed

1 file changed

+62
-46
lines changed

src/pcre2_jit_compile.c

Lines changed: 62 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8314,7 +8314,7 @@ sljit_u32 compares, invertcmp, depth;
83148314
sljit_u32 first_item, last_item, mid_item;
83158315
sljit_u32 range_start, range_end;
83168316
xclass_ranges ranges;
8317-
BOOL has_cmov;
8317+
BOOL has_cmov, last_range_set;
83188318

83198319
#ifdef SUPPORT_UNICODE
83208320
sljit_u32 unicode_status = 0;
@@ -8905,11 +8905,11 @@ if (ranges.range_count == 2)
89058905
return;
89068906
}
89078907

8908-
if (ranges.range_count > 6 && charoffset != 0)
8909-
{
8910-
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8911-
charoffset = 0;
8912-
}
8908+
range_start = ranges.ranges[0];
8909+
range_end = ranges.ranges[ranges.range_count - 1];
8910+
SET_CHAR_OFFSET(range_start);
8911+
add_jump(compiler, (ccbegin[-1] & XCL_NOT) == 0 ? backtracks : &found,
8912+
CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)));
89138913

89148914
depth = 0;
89158915
first_item = 0;
@@ -8919,16 +8919,25 @@ has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
89198919
while (TRUE)
89208920
{
89218921
/* At least two items are present. */
8922-
SLJIT_ASSERT(first_item < last_item);
8922+
SLJIT_ASSERT(first_item < last_item && charoffset == ranges.ranges[0]);
8923+
last_range_set = FALSE;
89238924

89248925
if (first_item + 6 <= last_item)
89258926
{
8926-
SLJIT_ASSERT(charoffset == 0);
89278927
mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1;
89288928
SLJIT_ASSERT(last_item >= mid_item + 4);
89298929

89308930
range_end = ranges.ranges[mid_item + 1];
8931-
ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)range_end);
8931+
if (first_item + 6 > mid_item && ranges.ranges[mid_item] == range_end)
8932+
{
8933+
OP2U(SLJIT_SUB | SLJIT_SET_GREATER | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
8934+
ranges.stack[depth].jump = JUMP(SLJIT_GREATER);
8935+
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8936+
last_range_set = TRUE;
8937+
}
8938+
else
8939+
ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
8940+
89328941
ranges.stack[depth].first_item = (sljit_u32)(mid_item + 2);
89338942
ranges.stack[depth].last_item = (sljit_u32)last_item;
89348943

@@ -8937,61 +8946,68 @@ while (TRUE)
89378946
depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges);
89388947

89398948
last_item = mid_item;
8940-
continue;
8941-
}
8942-
8943-
range_start = ranges.ranges[first_item];
8944-
range_end = ranges.ranges[first_item + 1];
8949+
if (!last_range_set)
8950+
continue;
89458951

8946-
if (range_start < range_end)
8947-
{
8948-
SET_CHAR_OFFSET(range_start);
8949-
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
8950-
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8952+
last_item -= 2;
89518953
}
8952-
else
8954+
8955+
if (!last_range_set)
89538956
{
8954-
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)range_start);
8955-
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8957+
range_start = ranges.ranges[first_item];
8958+
range_end = ranges.ranges[first_item + 1];
8959+
8960+
if (range_start < range_end)
8961+
{
8962+
SET_CHAR_OFFSET(range_start);
8963+
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
8964+
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8965+
}
8966+
else
8967+
{
8968+
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
8969+
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8970+
}
8971+
first_item += 2;
89568972
}
89578973

8958-
if (first_item < last_item)
8974+
SLJIT_ASSERT(first_item <= last_item);
8975+
8976+
do
89598977
{
8960-
do
8961-
{
8962-
first_item += 2;
8963-
range_start = ranges.ranges[first_item];
8964-
range_end = ranges.ranges[first_item + 1];
8978+
range_start = ranges.ranges[first_item];
8979+
range_end = ranges.ranges[first_item + 1];
89658980

8966-
if (range_start < range_end)
8967-
{
8968-
SET_CHAR_OFFSET(range_start);
8969-
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
8981+
if (range_start < range_end)
8982+
{
8983+
SET_CHAR_OFFSET(range_start);
8984+
OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
89708985

8971-
if (has_cmov)
8972-
SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
8973-
else
8974-
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
8975-
}
8986+
if (has_cmov)
8987+
SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
89768988
else
8977-
{
8978-
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
8989+
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
8990+
}
8991+
else
8992+
{
8993+
OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
89798994

8980-
if (has_cmov)
8981-
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
8982-
else
8983-
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8984-
}
8995+
if (has_cmov)
8996+
SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
8997+
else
8998+
OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
89858999
}
8986-
while (first_item < last_item);
9000+
9001+
first_item += 2;
89879002
}
9003+
while (first_item <= last_item);
89889004

89899005
if (depth == 0) break;
89909006

89919007
add_jump(compiler, &check_result, JUMP(SLJIT_JUMP));
89929008

89939009
/* The charoffset resets after the end of a branch is reached. */
8994-
charoffset = 0;
9010+
charoffset = ranges.ranges[0];
89959011
depth--;
89969012
first_item = ranges.stack[depth].first_item;
89979013
last_item = ranges.stack[depth].last_item;

0 commit comments

Comments
 (0)