@@ -8104,6 +8104,187 @@ SLJIT_ASSERT(next_char <= (const uint8_t*)common->start);
81048104ranges -> range_count = range_count ;
81058105}
81068106
8107+ #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 )
8108+
8109+ static void xclass_update_min_max (compiler_common * common , PCRE2_SPTR cc , sljit_u32 * min_ptr , sljit_u32 * max_ptr )
8110+ {
8111+ uint32_t type , list_ind , c ;
8112+ sljit_u32 min = * min_ptr ;
8113+ sljit_u32 max = * max_ptr ;
8114+ uint32_t char_list_add ;
8115+ const uint8_t * next_char ;
8116+ BOOL utf = TRUE;
8117+
8118+ /* This function is pointless without utf 8/16. */
8119+ SLJIT_ASSERT (common -> utf );
8120+ if (* cc == XCL_SINGLE || * cc == XCL_RANGE )
8121+ {
8122+ /* Only a few ranges are present. */
8123+ do
8124+ {
8125+ type = * cc ++ ;
8126+ SLJIT_ASSERT (type == XCL_SINGLE || type == XCL_RANGE );
8127+ GETCHARINCTEST (c , cc );
8128+
8129+ if (c < min )
8130+ min = c ;
8131+
8132+ if (type == XCL_RANGE )
8133+ {
8134+ GETCHARINCTEST (c , cc );
8135+ }
8136+
8137+ if (c > max )
8138+ max = c ;
8139+ }
8140+ while (* cc != XCL_END );
8141+
8142+ SLJIT_ASSERT (min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max );
8143+ * min_ptr = min ;
8144+ * max_ptr = max ;
8145+ return ;
8146+ }
8147+
8148+ SLJIT_ASSERT (cc [0 ] >= XCL_LIST );
8149+ #if PCRE2_CODE_UNIT_WIDTH == 8
8150+ type = (uint32_t )(cc [0 ] << 8 ) | cc [1 ];
8151+ cc += 2 ;
8152+ #else
8153+ type = cc [0 ];
8154+ cc ++ ;
8155+ #endif /* CODE_UNIT_WIDTH */
8156+
8157+ /* Align characters. */
8158+ next_char = (const uint8_t * )common -> start - (GET (cc , 0 ) << 1 );
8159+ type &= XCL_TYPE_MASK ;
8160+
8161+ SLJIT_ASSERT (type != 0 );
8162+
8163+ /* Detect minimum. */
8164+
8165+ /* Skip unused ranges. */
8166+ list_ind = 0 ;
8167+ while ((type & (XCL_BEGIN_WITH_RANGE | XCL_ITEM_COUNT_MASK )) == 0 )
8168+ {
8169+ type >>= XCL_TYPE_BIT_LEN ;
8170+ list_ind ++ ;
8171+ }
8172+
8173+ SLJIT_ASSERT (list_ind <= 2 );
8174+ switch (list_ind )
8175+ {
8176+ case 0 :
8177+ char_list_add = XCL_CHAR_LIST_LOW_16_ADD ;
8178+ c = XCL_CHAR_LIST_LOW_16_START ;
8179+ break ;
8180+
8181+ case 1 :
8182+ char_list_add = XCL_CHAR_LIST_HIGH_16_ADD ;
8183+ c = XCL_CHAR_LIST_HIGH_16_START ;
8184+ break ;
8185+
8186+ default :
8187+ char_list_add = XCL_CHAR_LIST_LOW_32_ADD ;
8188+ c = XCL_CHAR_LIST_LOW_32_START ;
8189+ break ;
8190+ }
8191+
8192+ if ((type & XCL_BEGIN_WITH_RANGE ) != 0 )
8193+ {
8194+ if (c < min )
8195+ min = c ;
8196+ }
8197+ else
8198+ {
8199+ if ((type & XCL_ITEM_COUNT_MASK ) == XCL_ITEM_COUNT_MASK )
8200+ {
8201+ if (list_ind <= 1 )
8202+ c = * (const uint16_t * )(next_char + 2 );
8203+ else
8204+ c = * (const uint32_t * )(next_char + 4 );
8205+ }
8206+ else
8207+ {
8208+ if (list_ind <= 1 )
8209+ c = * (const uint16_t * )next_char ;
8210+ else
8211+ c = * (const uint32_t * )next_char ;
8212+ }
8213+
8214+ c = char_list_add + (c >> XCL_CHAR_SHIFT );
8215+ if (c < min )
8216+ min = c ;
8217+ }
8218+
8219+ /* Detect maximum. */
8220+
8221+ /* Skip intermediate ranges. */
8222+ while (TRUE)
8223+ {
8224+ if ((type & XCL_ITEM_COUNT_MASK ) == XCL_ITEM_COUNT_MASK )
8225+ {
8226+ if (list_ind <= 1 )
8227+ {
8228+ c = * (const uint16_t * )next_char ;
8229+ next_char += (c + 1 ) << 1 ;
8230+ }
8231+ else
8232+ {
8233+ c = * (const uint32_t * )next_char ;
8234+ next_char += (c + 1 ) << 2 ;
8235+ }
8236+ }
8237+ else
8238+ next_char += (type & XCL_ITEM_COUNT_MASK ) << (list_ind <= 1 ? 1 : 2 );
8239+
8240+ if ((type >> XCL_TYPE_BIT_LEN ) == 0 )
8241+ break ;
8242+
8243+ list_ind ++ ;
8244+ type >>= XCL_TYPE_BIT_LEN ;
8245+ }
8246+
8247+ SLJIT_ASSERT (list_ind <= 2 && type != 0 );
8248+ switch (list_ind )
8249+ {
8250+ case 0 :
8251+ char_list_add = XCL_CHAR_LIST_LOW_16_ADD ;
8252+ c = XCL_CHAR_LIST_LOW_16_END ;
8253+ break ;
8254+
8255+ case 1 :
8256+ char_list_add = XCL_CHAR_LIST_HIGH_16_ADD ;
8257+ c = XCL_CHAR_LIST_HIGH_16_END ;
8258+ break ;
8259+
8260+ default :
8261+ char_list_add = XCL_CHAR_LIST_LOW_32_ADD ;
8262+ c = XCL_CHAR_LIST_LOW_32_END ;
8263+ break ;
8264+ }
8265+
8266+ if ((type & XCL_ITEM_COUNT_MASK ) != 0 )
8267+ {
8268+ /* Type is reused as temporary. */
8269+ if (list_ind <= 1 )
8270+ type = * (const uint16_t * )(next_char - 2 );
8271+ else
8272+ type = * (const uint32_t * )(next_char - 4 );
8273+
8274+ if (type & XCL_CHAR_END )
8275+ c = char_list_add + (type >> XCL_CHAR_SHIFT );
8276+ }
8277+
8278+ if (c > max )
8279+ max = c ;
8280+
8281+ SLJIT_ASSERT (min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max );
8282+ * min_ptr = min ;
8283+ * max_ptr = max ;
8284+ }
8285+
8286+ #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
8287+
81078288#ifdef SUPPORT_UNICODE
81088289#define XCLASS_SAVE_CHAR 0x001
81098290#define XCLASS_CHAR_SAVED 0x002
@@ -8126,7 +8307,7 @@ jump_list *found = NULL;
81268307jump_list * check_result = NULL ;
81278308jump_list * * list = (cc [0 ] & XCL_NOT ) == 0 ? & found : backtracks ;
81288309sljit_uw c , charoffset ;
8129- sljit_u32 max = 256 , min = READ_CHAR_MAX ;
8310+ sljit_u32 max = READ_CHAR_MAX , min = 0 ;
81308311struct sljit_jump * jump = NULL ;
81318312PCRE2_SPTR ccbegin ;
81328313sljit_u32 compares , invertcmp , depth ;
@@ -8149,18 +8330,13 @@ ccbegin = cc;
81498330compares = 0 ;
81508331
81518332if (cc [-1 ] & XCL_MAP )
8152- {
8153- min = 0 ;
81548333 cc += 32 / sizeof (PCRE2_UCHAR );
8155- }
81568334
81578335#ifdef SUPPORT_UNICODE
81588336while (* cc == XCL_PROP || * cc == XCL_NOTPROP )
81598337 {
81608338 compares ++ ;
81618339 cc ++ ;
8162- max = READ_CHAR_MAX ;
8163- min = 0 ;
81648340
81658341 items = 0 ;
81668342
@@ -8256,22 +8432,16 @@ if (category_list == UCPCAT_ALL)
82568432 }
82578433#endif
82588434
8259- ranges .range_count = 0 ;
8260- ranges .ranges = ranges .local_ranges ;
8261- ranges .stack = ranges .local_stack ;
8262-
82638435if (* cc != XCL_END )
82648436 {
8265- xclass_compute_ranges (common , cc , & ranges );
8266-
8267- if (ranges .stack == NULL )
8268- return ;
8269-
8270- if (ranges .ranges [ranges .range_count - 1 ] > max )
8271- max = ranges .ranges [ranges .range_count - 1 ];
8272- if (ranges .ranges [0 ] < min )
8273- min = ranges .ranges [0 ];
8274-
8437+ #if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 )
8438+ if (common -> utf && compares == 0 )
8439+ {
8440+ max = 0 ;
8441+ min = (ccbegin [-1 ] & XCL_MAP ) != 0 ? 0 : READ_CHAR_MAX ;
8442+ xclass_update_min_max (common , cc , & min , & max );
8443+ }
8444+ #endif
82758445 compares ++ ;
82768446#ifdef SUPPORT_UNICODE
82778447 unicode_status |= XCLASS_SAVE_CHAR ;
@@ -8282,8 +8452,6 @@ if (*cc != XCL_END)
82828452if (compares == 0 && category_list == 0 )
82838453 {
82848454 /* No characters are accepted, same as (*F) or dotall. */
8285- SLJIT_ASSERT (ranges .stack == ranges .local_stack );
8286-
82878455 compile_char1_matchingpath (common , OP_ALLANY , cc , backtracks , FALSE);
82888456 if (list != backtracks )
82898457 add_jump (compiler , backtracks , JUMP (SLJIT_JUMP ));
@@ -8324,11 +8492,6 @@ if ((cc[-1] & XCL_MAP) != 0)
83248492
83258493 cc += 32 / sizeof (PCRE2_UCHAR );
83268494 }
8327- else
8328- {
8329- OP2 (SLJIT_SUB , TMP2 , 0 , TMP1 , 0 , SLJIT_IMM , min );
8330- add_jump (compiler , (cc [-1 ] & XCL_NOT ) == 0 ? backtracks : & found , CMP (SLJIT_GREATER , TMP2 , 0 , SLJIT_IMM , max - min ));
8331- }
83328495
83338496#ifdef SUPPORT_UNICODE
83348497if (unicode_status & XCLASS_NEEDS_UCD )
@@ -8690,19 +8853,35 @@ while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
86908853 add_jump (compiler , compares > 0 ? list : backtracks , jump );
86918854 }
86928855
8693- if (ranges . range_count == 0 )
8856+ if (compares == 0 )
86948857 {
8695- SLJIT_ASSERT (compares == 0 && ranges .stack == ranges .local_stack );
8696-
86978858 if (found != NULL )
86988859 set_jumps (found , LABEL ());
86998860 return ;
87008861 }
8701- #else
8702- SLJIT_ASSERT (ranges .range_count > 0 );
87038862#endif /* SUPPORT_UNICODE */
87048863
87058864SLJIT_ASSERT (compares == 1 );
8865+ ranges .range_count = 0 ;
8866+ ranges .ranges = ranges .local_ranges ;
8867+ ranges .stack = ranges .local_stack ;
8868+
8869+ xclass_compute_ranges (common , cc , & ranges );
8870+
8871+ if (ranges .stack == NULL )
8872+ return ;
8873+
8874+ #if (defined SLJIT_DEBUG && SLJIT_DEBUG ) && \
8875+ defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 )
8876+ if (common -> utf )
8877+ {
8878+ min = 0xffffffff ;
8879+ max = 0 ;
8880+ xclass_update_min_max (common , cc , & min , & max );
8881+ SLJIT_ASSERT (ranges .ranges [0 ] == min && ranges .ranges [ranges .range_count - 1 ] == max );
8882+ }
8883+ #endif /* SLJIT_DEBUG && SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
8884+
87068885invertcmp = (list != backtracks );
87078886
87088887if (ranges .range_count == 2 )
0 commit comments