@@ -6691,6 +6691,15 @@ else
6691
6691
JUMPTO (SLJIT_JUMP , mainloop );
6692
6692
}
6693
6693
6694
+ #ifdef SUPPORT_UNICODE
6695
+ #define UCPCAT (bit ) (1 << (bit))
6696
+ #define UCPCAT2 (bit1 , bit2 ) (UCPCAT(bit1) | UCPCAT(bit2))
6697
+ #define UCPCAT3 (bit1 , bit2 , bit3 ) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
6698
+ #define UCPCAT_RANGE (start , end ) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
6699
+ #define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
6700
+ #define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
6701
+ #endif
6702
+
6694
6703
static void check_wordboundary (compiler_common * common , BOOL ucp )
6695
6704
{
6696
6705
DEFINE_COMPILER ;
@@ -6748,17 +6757,9 @@ else
6748
6757
if (ucp )
6749
6758
{
6750
6759
add_jump (compiler , & common -> getucdtype , JUMP (SLJIT_FAST_CALL ));
6751
- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Mn );
6752
- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
6753
- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Pc );
6754
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
6755
- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Ll );
6756
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_Lu - ucp_Ll );
6757
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6758
- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Nd - ucp_Ll );
6759
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_No - ucp_Nd );
6760
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6761
- OP1 (SLJIT_MOV , TMP3 , 0 , TMP2 , 0 );
6760
+ OP2 (SLJIT_SHL , TMP2 , 0 , SLJIT_IMM , 1 , TMP1 , 0 );
6761
+ OP2U (SLJIT_AND | SLJIT_SET_Z , TMP2 , 0 , SLJIT_IMM , UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N );
6762
+ OP_FLAGS (SLJIT_MOV , TMP3 , 0 , SLJIT_NOT_ZERO );
6762
6763
}
6763
6764
else
6764
6765
#endif /* SUPPORT_UNICODE */
@@ -6795,16 +6796,9 @@ valid_utf = LABEL();
6795
6796
if (ucp )
6796
6797
{
6797
6798
add_jump (compiler , & common -> getucdtype , JUMP (SLJIT_FAST_CALL ));
6798
- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Mn );
6799
- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
6800
- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , ucp_Pc );
6801
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
6802
- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Ll );
6803
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_Lu - ucp_Ll );
6804
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6805
- OP2 (SLJIT_SUB , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , ucp_Nd - ucp_Ll );
6806
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , ucp_No - ucp_Nd );
6807
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
6799
+ OP2 (SLJIT_SHL , TMP2 , 0 , SLJIT_IMM , 1 , TMP1 , 0 );
6800
+ OP2U (SLJIT_AND | SLJIT_SET_Z , TMP2 , 0 , SLJIT_IMM , UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N );
6801
+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
6808
6802
}
6809
6803
else
6810
6804
#endif /* SUPPORT_UNICODE */
@@ -7543,16 +7537,6 @@ return cc;
7543
7537
7544
7538
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7545
7539
7546
- #define SET_TYPE_OFFSET (value ) \
7547
- if ((value) != typeoffset) \
7548
- { \
7549
- if ((value) < typeoffset) \
7550
- OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7551
- else \
7552
- OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7553
- } \
7554
- typeoffset = (value);
7555
-
7556
7540
#define SET_CHAR_OFFSET (value ) \
7557
7541
if ((value) != charoffset) \
7558
7542
{ \
@@ -7577,7 +7561,6 @@ static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHA
7577
7561
#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7578
7562
#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7579
7563
#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7580
-
7581
7564
#endif /* SUPPORT_UNICODE */
7582
7565
7583
7566
static void compile_xclass_matchingpath (compiler_common * common , PCRE2_SPTR cc , jump_list * * backtracks )
@@ -7597,7 +7580,6 @@ BOOL utf = common->utf;
7597
7580
sljit_u32 unicode_status = 0 ;
7598
7581
int typereg = TMP1 ;
7599
7582
const sljit_u32 * other_cases ;
7600
- sljit_uw typeoffset ;
7601
7583
#endif /* SUPPORT_UNICODE */
7602
7584
7603
7585
/* Scanning the necessary info. */
@@ -7672,6 +7654,7 @@ while (*cc != XCL_END)
7672
7654
case PT_LAMP :
7673
7655
case PT_GC :
7674
7656
case PT_PC :
7657
+ case PT_WORD :
7675
7658
case PT_ALNUM :
7676
7659
unicode_status |= XCLASS_HAS_TYPE ;
7677
7660
break ;
@@ -7692,7 +7675,6 @@ while (*cc != XCL_END)
7692
7675
7693
7676
case PT_SPACE :
7694
7677
case PT_PXSPACE :
7695
- case PT_WORD :
7696
7678
case PT_PXGRAPH :
7697
7679
case PT_PXPRINT :
7698
7680
case PT_PXPUNCT :
@@ -8027,16 +8009,14 @@ if (unicode_status & XCLASS_NEEDS_UCD)
8027
8009
typereg = RETURN_ADDR ;
8028
8010
8029
8011
OP1 (SLJIT_MOV_U8 , typereg , 0 , SLJIT_MEM1 (TMP2 ), (sljit_sw )PRIV (ucd_records ) + SLJIT_OFFSETOF (ucd_record , chartype ));
8012
+ OP2 (SLJIT_SHL , typereg , 0 , SLJIT_IMM , 1 , typereg , 0 );
8030
8013
}
8031
8014
}
8032
8015
#endif /* SUPPORT_UNICODE */
8033
8016
8034
8017
/* Generating code. */
8035
8018
charoffset = 0 ;
8036
8019
numberofcmps = 0 ;
8037
- #ifdef SUPPORT_UNICODE
8038
- typeoffset = 0 ;
8039
- #endif /* SUPPORT_UNICODE */
8040
8020
8041
8021
while (* cc != XCL_END )
8042
8022
{
@@ -8109,23 +8089,18 @@ while (*cc != XCL_END)
8109
8089
break ;
8110
8090
8111
8091
case PT_LAMP :
8112
- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Lu - typeoffset );
8113
- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
8114
- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Ll - typeoffset );
8115
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
8116
- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Lt - typeoffset );
8117
- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_EQUAL );
8092
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT3 (ucp_Lu , ucp_Ll , ucp_Lt ));
8118
8093
jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8119
8094
break ;
8120
8095
8121
8096
case PT_GC :
8122
- c = PRIV (ucp_typerange )[(int )cc [1 ] * 2 ];
8123
- SET_TYPE_OFFSET (c );
8124
- jump = CMP (SLJIT_LESS_EQUAL ^ invertcmp , typereg , 0 , SLJIT_IMM , PRIV (ucp_typerange )[(int )cc [1 ] * 2 + 1 ] - c );
8097
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (PRIV (ucp_typerange )[(int )cc [1 ] * 2 ], PRIV (ucp_typerange )[(int )cc [1 ] * 2 + 1 ]));
8098
+ jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8125
8099
break ;
8126
8100
8127
8101
case PT_PC :
8128
- jump = CMP (SLJIT_EQUAL ^ invertcmp , typereg , 0 , SLJIT_IMM , (int )cc [1 ] - typeoffset );
8102
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT (cc [1 ]));
8103
+ jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8129
8104
break ;
8130
8105
8131
8106
case PT_SC :
@@ -8148,26 +8123,18 @@ while (*cc != XCL_END)
8148
8123
OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , 0x180e - 0x9 );
8149
8124
OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
8150
8125
8151
- SET_TYPE_OFFSET (ucp_Zl );
8152
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Zs - ucp_Zl );
8153
- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_LESS_EQUAL );
8126
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Zl , ucp_Zs ));
8127
+ OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_NOT_ZERO );
8154
8128
jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8155
8129
break ;
8156
8130
8157
8131
case PT_WORD :
8158
- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Mn - typeoffset );
8159
- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_EQUAL );
8160
- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Pc - typeoffset );
8161
- OP_FLAGS (SLJIT_OR , TMP2 , 0 , SLJIT_EQUAL );
8162
- /* Fall through. */
8132
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT2 (ucp_Mn , ucp_Pc ) | UCPCAT_L | UCPCAT_N );
8133
+ jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8134
+ break ;
8163
8135
8164
8136
case PT_ALNUM :
8165
- SET_TYPE_OFFSET (ucp_Ll );
8166
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Lu - ucp_Ll );
8167
- OP_FLAGS ((* cc == PT_ALNUM ) ? SLJIT_MOV : SLJIT_OR , TMP2 , 0 , SLJIT_LESS_EQUAL );
8168
- SET_TYPE_OFFSET (ucp_Nd );
8169
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_No - ucp_Nd );
8170
- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_LESS_EQUAL );
8137
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_L | UCPCAT_N );
8171
8138
jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8172
8139
break ;
8173
8140
@@ -8242,12 +8209,11 @@ while (*cc != XCL_END)
8242
8209
break ;
8243
8210
8244
8211
case PT_PXGRAPH :
8245
- /* C and Z groups are the farthest two groups. */
8246
- SET_TYPE_OFFSET (ucp_Ll );
8247
- OP2U (SLJIT_SUB | SLJIT_SET_GREATER , typereg , 0 , SLJIT_IMM , ucp_So - ucp_Ll );
8248
- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_GREATER );
8212
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Cc , ucp_Cs ) | UCPCAT_RANGE (ucp_Zl , ucp_Zs ));
8213
+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
8249
8214
8250
- jump = CMP (SLJIT_NOT_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Cf - ucp_Ll );
8215
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT (ucp_Cf ));
8216
+ jump = JUMP (SLJIT_ZERO );
8251
8217
8252
8218
/* In case of ucp_Cf, we overwrite the result. */
8253
8219
SET_CHAR_OFFSET (0x2066 );
@@ -8265,15 +8231,11 @@ while (*cc != XCL_END)
8265
8231
break ;
8266
8232
8267
8233
case PT_PXPRINT :
8268
- /* C and Z groups are the farthest two groups. */
8269
- SET_TYPE_OFFSET (ucp_Ll );
8270
- OP2U (SLJIT_SUB | SLJIT_SET_GREATER , typereg , 0 , SLJIT_IMM , ucp_So - ucp_Ll );
8271
- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_GREATER );
8234
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Cc , ucp_Cs ) | UCPCAT2 (ucp_Zl , ucp_Zp ));
8235
+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
8272
8236
8273
- OP2U (SLJIT_SUB | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , ucp_Zs - ucp_Ll );
8274
- OP_FLAGS (SLJIT_AND , TMP2 , 0 , SLJIT_NOT_EQUAL );
8275
-
8276
- jump = CMP (SLJIT_NOT_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Cf - ucp_Ll );
8237
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT (ucp_Cf ));
8238
+ jump = JUMP (SLJIT_ZERO );
8277
8239
8278
8240
/* In case of ucp_Cf, we overwrite the result. */
8279
8241
SET_CHAR_OFFSET (0x2066 );
@@ -8288,17 +8250,15 @@ while (*cc != XCL_END)
8288
8250
break ;
8289
8251
8290
8252
case PT_PXPUNCT :
8291
- SET_TYPE_OFFSET (ucp_Sc );
8292
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_So - ucp_Sc );
8293
- OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_LESS_EQUAL );
8253
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Sc , ucp_So ));
8254
+ OP_FLAGS (SLJIT_MOV , TMP2 , 0 , SLJIT_NOT_ZERO );
8294
8255
8295
8256
SET_CHAR_OFFSET (0 );
8296
8257
OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , TMP1 , 0 , SLJIT_IMM , 0x7f );
8297
8258
OP_FLAGS (SLJIT_AND , TMP2 , 0 , SLJIT_LESS_EQUAL );
8298
8259
8299
- SET_TYPE_OFFSET (ucp_Pc );
8300
- OP2U (SLJIT_SUB | SLJIT_SET_LESS_EQUAL , typereg , 0 , SLJIT_IMM , ucp_Ps - ucp_Pc );
8301
- OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_LESS_EQUAL );
8260
+ OP2U (SLJIT_AND | SLJIT_SET_Z , typereg , 0 , SLJIT_IMM , UCPCAT_RANGE (ucp_Pc , ucp_Ps ));
8261
+ OP_FLAGS (SLJIT_OR | SLJIT_SET_Z , TMP2 , 0 , SLJIT_NOT_ZERO );
8302
8262
jump = JUMP (SLJIT_NOT_ZERO ^ invertcmp );
8303
8263
break ;
8304
8264
0 commit comments