@@ -5670,11 +5670,17 @@ if (last)
56705670 chars -> last_count ++ ;
56715671}
56725672
5673- static int scan_prefix (compiler_common * common , PCRE2_SPTR cc , fast_forward_char_data * chars , int max_chars , sljit_u32 * rec_count )
5673+ #define SCAN_PREFIX_STACK_END 32
5674+
5675+ static int scan_prefix (compiler_common * common , PCRE2_SPTR cc , fast_forward_char_data * chars )
56745676{
5675- /* Recursive function, which scans prefix literals. */
5677+ fast_forward_char_data * chars_start = chars ;
5678+ fast_forward_char_data * chars_end = chars + MAX_N_CHARS ;
5679+ PCRE2_SPTR cc_stack [SCAN_PREFIX_STACK_END ];
5680+ fast_forward_char_data * chars_stack [SCAN_PREFIX_STACK_END ];
5681+ sljit_u8 next_alternative_stack [SCAN_PREFIX_STACK_END ];
56765682BOOL last , any , class , caseless ;
5677- int len , repeat , len_save , consumed = 0 ;
5683+ int stack_ptr , rec_count , repeat , len , len_save ;
56785684sljit_u32 chr ; /* Any unicode character. */
56795685sljit_u8 * bytes , * bytes_end , byte ;
56805686PCRE2_SPTR alternative , cc_save , oc ;
@@ -5687,11 +5693,44 @@ PCRE2_UCHAR othercase[1];
56875693#endif
56885694
56895695repeat = 1 ;
5696+ stack_ptr = 0 ;
5697+ rec_count = 10000 ;
56905698while (TRUE)
56915699 {
5692- if (* rec_count == 0 )
5700+ if (-- rec_count == 0 )
56935701 return 0 ;
5694- (* rec_count )-- ;
5702+
5703+ SLJIT_ASSERT (chars <= chars_start + MAX_N_CHARS );
5704+
5705+ if (chars >= chars_end )
5706+ {
5707+ if (stack_ptr == 0 )
5708+ return chars_end - chars_start ;
5709+
5710+ -- stack_ptr ;
5711+ cc = cc_stack [stack_ptr ];
5712+ chars = chars_stack [stack_ptr ];
5713+
5714+ if (chars >= chars_end )
5715+ continue ;
5716+
5717+ if (next_alternative_stack [stack_ptr ] != 0 )
5718+ {
5719+ /* When an alternative is processed, the
5720+ next alternative is pushed onto the stack. */
5721+ SLJIT_ASSERT (* cc == OP_ALT );
5722+ alternative = cc + GET (cc , 1 );
5723+ if (* alternative == OP_ALT )
5724+ {
5725+ SLJIT_ASSERT (stack_ptr < SCAN_PREFIX_STACK_END );
5726+ cc_stack [stack_ptr ] = alternative ;
5727+ chars_stack [stack_ptr ] = chars ;
5728+ next_alternative_stack [stack_ptr ] = 1 ;
5729+ stack_ptr ++ ;
5730+ }
5731+ cc += 1 + LINK_SIZE ;
5732+ }
5733+ }
56955734
56965735 last = TRUE;
56975736 any = FALSE;
@@ -5768,9 +5807,17 @@ while (TRUE)
57685807#ifdef SUPPORT_UNICODE
57695808 if (common -> utf && HAS_EXTRALEN (* cc )) len += GET_EXTRALEN (* cc );
57705809#endif
5771- max_chars = scan_prefix (common , cc + len , chars , max_chars , rec_count );
5772- if (max_chars == 0 )
5773- return consumed ;
5810+ if (stack_ptr >= SCAN_PREFIX_STACK_END )
5811+ {
5812+ chars_end = chars ;
5813+ continue ;
5814+ }
5815+
5816+ cc_stack [stack_ptr ] = cc + len ;
5817+ chars_stack [stack_ptr ] = chars ;
5818+ next_alternative_stack [stack_ptr ] = 0 ;
5819+ stack_ptr ++ ;
5820+
57745821 last = FALSE;
57755822 break ;
57765823
@@ -5788,12 +5835,18 @@ while (TRUE)
57885835 case OP_CBRA :
57895836 case OP_CBRAPOS :
57905837 alternative = cc + GET (cc , 1 );
5791- while (* alternative == OP_ALT )
5838+ if (* alternative == OP_ALT )
57925839 {
5793- max_chars = scan_prefix (common , alternative + 1 + LINK_SIZE , chars , max_chars , rec_count );
5794- if (max_chars == 0 )
5795- return consumed ;
5796- alternative += GET (alternative , 1 );
5840+ if (stack_ptr >= SCAN_PREFIX_STACK_END )
5841+ {
5842+ chars_end = chars ;
5843+ continue ;
5844+ }
5845+
5846+ cc_stack [stack_ptr ] = alternative ;
5847+ chars_stack [stack_ptr ] = chars ;
5848+ next_alternative_stack [stack_ptr ] = 1 ;
5849+ stack_ptr ++ ;
57975850 }
57985851
57995852 if (* cc == OP_CBRA || * cc == OP_CBRAPOS )
@@ -5804,22 +5857,33 @@ while (TRUE)
58045857 case OP_CLASS :
58055858#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
58065859 if (common -> utf && !is_char7_bitset ((const sljit_u8 * )(cc + 1 ), FALSE))
5807- return consumed ;
5860+ {
5861+ chars_end = chars ;
5862+ continue ;
5863+ }
58085864#endif
58095865 class = TRUE;
58105866 break ;
58115867
58125868 case OP_NCLASS :
58135869#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5814- if (common -> utf ) return consumed ;
5870+ if (common -> utf )
5871+ {
5872+ chars_end = chars ;
5873+ continue ;
5874+ }
58155875#endif
58165876 class = TRUE;
58175877 break ;
58185878
58195879#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
58205880 case OP_XCLASS :
58215881#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5822- if (common -> utf ) return consumed ;
5882+ if (common -> utf )
5883+ {
5884+ chars_end = chars ;
5885+ continue ;
5886+ }
58235887#endif
58245888 any = TRUE;
58255889 cc += GET (cc , 1 );
@@ -5829,7 +5893,10 @@ while (TRUE)
58295893 case OP_DIGIT :
58305894#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
58315895 if (common -> utf && !is_char7_bitset ((const sljit_u8 * )common -> ctypes - cbit_length + cbit_digit , FALSE))
5832- return consumed ;
5896+ {
5897+ chars_end = chars ;
5898+ continue ;
5899+ }
58335900#endif
58345901 any = TRUE;
58355902 cc ++ ;
@@ -5838,7 +5905,10 @@ while (TRUE)
58385905 case OP_WHITESPACE :
58395906#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
58405907 if (common -> utf && !is_char7_bitset ((const sljit_u8 * )common -> ctypes - cbit_length + cbit_space , FALSE))
5841- return consumed ;
5908+ {
5909+ chars_end = chars ;
5910+ continue ;
5911+ }
58425912#endif
58435913 any = TRUE;
58445914 cc ++ ;
@@ -5847,7 +5917,10 @@ while (TRUE)
58475917 case OP_WORDCHAR :
58485918#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
58495919 if (common -> utf && !is_char7_bitset ((const sljit_u8 * )common -> ctypes - cbit_length + cbit_word , FALSE))
5850- return consumed ;
5920+ {
5921+ chars_end = chars ;
5922+ continue ;
5923+ }
58515924#endif
58525925 any = TRUE;
58535926 cc ++ ;
@@ -5863,7 +5936,11 @@ while (TRUE)
58635936 case OP_ANY :
58645937 case OP_ALLANY :
58655938#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5866- if (common -> utf ) return consumed ;
5939+ if (common -> utf )
5940+ {
5941+ chars_end = chars ;
5942+ continue ;
5943+ }
58675944#endif
58685945 any = TRUE;
58695946 cc ++ ;
@@ -5873,7 +5950,11 @@ while (TRUE)
58735950 case OP_NOTPROP :
58745951 case OP_PROP :
58755952#if PCRE2_CODE_UNIT_WIDTH != 32
5876- if (common -> utf ) return consumed ;
5953+ if (common -> utf )
5954+ {
5955+ chars_end = chars ;
5956+ continue ;
5957+ }
58775958#endif
58785959 any = TRUE;
58795960 cc += 1 + 2 ;
@@ -5888,29 +5969,32 @@ while (TRUE)
58885969 case OP_NOTEXACT :
58895970 case OP_NOTEXACTI :
58905971#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5891- if (common -> utf ) return consumed ;
5972+ if (common -> utf )
5973+ {
5974+ chars_end = chars ;
5975+ continue ;
5976+ }
58925977#endif
58935978 any = TRUE;
58945979 repeat = GET2 (cc , 1 );
58955980 cc += 1 + IMM2_SIZE + 1 ;
58965981 break ;
58975982
58985983 default :
5899- return consumed ;
5984+ chars_end = chars ;
5985+ continue ;
59005986 }
59015987
5988+ SLJIT_ASSERT (chars < chars_end );
5989+
59025990 if (any )
59035991 {
59045992 do
59055993 {
59065994 chars -> count = 255 ;
5907-
5908- consumed ++ ;
5909- if (-- max_chars == 0 )
5910- return consumed ;
59115995 chars ++ ;
59125996 }
5913- while (-- repeat > 0 );
5997+ while (-- repeat > 0 && chars < chars_end );
59145998
59155999 repeat = 1 ;
59166000 continue ;
@@ -5929,9 +6013,16 @@ while (TRUE)
59296013 case OP_CRQUERY :
59306014 case OP_CRMINQUERY :
59316015 case OP_CRPOSQUERY :
5932- max_chars = scan_prefix (common , cc + 1 , chars , max_chars , rec_count );
5933- if (max_chars == 0 )
5934- return consumed ;
6016+ if (stack_ptr >= SCAN_PREFIX_STACK_END )
6017+ {
6018+ chars_end = chars ;
6019+ continue ;
6020+ }
6021+
6022+ cc_stack [stack_ptr ] = cc + 1 ;
6023+ chars_stack [stack_ptr ] = chars ;
6024+ next_alternative_stack [stack_ptr ] = 0 ;
6025+ stack_ptr ++ ;
59356026 break ;
59366027
59376028 default :
@@ -5945,7 +6036,10 @@ while (TRUE)
59456036 case OP_CRPOSRANGE :
59466037 repeat = GET2 (cc , 1 );
59476038 if (repeat <= 0 )
5948- return consumed ;
6039+ {
6040+ chars_end = chars ;
6041+ continue ;
6042+ }
59496043 break ;
59506044 }
59516045
@@ -5980,19 +6074,18 @@ while (TRUE)
59806074 bytes = bytes_end - 32 ;
59816075 }
59826076
5983- consumed ++ ;
5984- if (-- max_chars == 0 )
5985- return consumed ;
59866077 chars ++ ;
59876078 }
5988- while (-- repeat > 0 );
6079+ while (-- repeat > 0 && chars < chars_end );
59896080
6081+ repeat = 1 ;
59906082 switch (* cc )
59916083 {
59926084 case OP_CRSTAR :
59936085 case OP_CRMINSTAR :
59946086 case OP_CRPOSSTAR :
5995- return consumed ;
6087+ chars_end = chars ;
6088+ break ;
59966089
59976090 case OP_CRQUERY :
59986091 case OP_CRMINQUERY :
@@ -6004,12 +6097,11 @@ while (TRUE)
60046097 case OP_CRMINRANGE :
60056098 case OP_CRPOSRANGE :
60066099 if (GET2 (cc , 1 ) != GET2 (cc , 1 + IMM2_SIZE ))
6007- return consumed ;
6100+ chars_end = chars ;
60086101 cc += 1 + 2 * IMM2_SIZE ;
60096102 break ;
60106103 }
60116104
6012- repeat = 1 ;
60136105 continue ;
60146106 }
60156107
@@ -6025,7 +6117,10 @@ while (TRUE)
60256117 {
60266118 GETCHAR (chr , cc );
60276119 if ((int )PRIV (ord2utf )(char_othercase (common , chr ), othercase ) != len )
6028- return consumed ;
6120+ {
6121+ chars_end = chars ;
6122+ continue ;
6123+ }
60296124 }
60306125 else
60316126#endif
@@ -6056,23 +6151,20 @@ while (TRUE)
60566151 do
60576152 {
60586153 len -- ;
6059- consumed ++ ;
60606154
60616155 chr = * cc ;
60626156 add_prefix_char (* cc , chars , len == 0 );
60636157
60646158 if (caseless )
60656159 add_prefix_char (* oc , chars , len == 0 );
60666160
6067- if (-- max_chars == 0 )
6068- return consumed ;
60696161 chars ++ ;
60706162 cc ++ ;
60716163 oc ++ ;
60726164 }
6073- while (len > 0 );
6165+ while (len > 0 && chars < chars_end );
60746166
6075- if (-- repeat == 0 )
6167+ if (-- repeat == 0 || chars >= chars_end )
60766168 break ;
60776169
60786170 len = len_save ;
@@ -6081,7 +6173,7 @@ while (TRUE)
60816173
60826174 repeat = 1 ;
60836175 if (last )
6084- return consumed ;
6176+ chars_end = chars ;
60856177 }
60866178}
60876179
@@ -6251,16 +6343,14 @@ int i, max, from;
62516343int range_right = -1 , range_len ;
62526344sljit_u8 * update_table = NULL ;
62536345BOOL in_range ;
6254- sljit_u32 rec_count ;
62556346
62566347for (i = 0 ; i < MAX_N_CHARS ; i ++ )
62576348 {
62586349 chars [i ].count = 0 ;
62596350 chars [i ].last_count = 0 ;
62606351 }
62616352
6262- rec_count = 10000 ;
6263- max = scan_prefix (common , common -> start , chars , MAX_N_CHARS , & rec_count );
6353+ max = scan_prefix (common , common -> start , chars );
62646354
62656355if (max < 1 )
62666356 return FALSE;
0 commit comments