@@ -214,10 +214,10 @@ static unsigned char meta_extra_lengths[] = {
214214 1 + SIZEOFFSET , /* META_COND_RNAME */
215215 1 + SIZEOFFSET , /* META_COND_RNUMBER */
216216 3 , /* META_COND_VERSION */
217- 1 + SIZEOFFSET , /* META_SCS_NAME */
218- 1 + SIZEOFFSET , /* META_SCS_NUMBER */
219- 1 + SIZEOFFSET , /* META_SCS_NEXT_NAME */
220- 1 + SIZEOFFSET , /* META_SCS_NEXT_NUMBER */
217+ SIZEOFFSET , /* META_OFFSET */
218+ 0 , /* META_SCS */
219+ 1 , /* META_SCS_NAME */
220+ 1 , /* META_SCS_NUMBER */
221221 0 , /* META_DOLLAR */
222222 0 , /* META_DOT */
223223 0 , /* META_ESCAPE - one more for ESC_P and ESC_p */
@@ -1024,30 +1024,22 @@ for (;;)
10241024 fprintf (stderr , "%zd" , offset );
10251025 break ;
10261026
1027- case META_SCS_NAME :
1028- fprintf (stderr , "META (*scan_substring:(<name>) length=%d offset=" , * pptr ++ );
1027+ case META_OFFSET :
1028+ fprintf (stderr , "META_OFFSET offset=" );
10291029 GETOFFSET (offset , pptr );
10301030 fprintf (stderr , "%zd" , offset );
10311031 break ;
10321032
1033- case META_SCS_NUMBER :
1034- fprintf (stderr , "META_SCS_NUMBER %d offset=" , pptr [SIZEOFFSET ]);
1035- GETOFFSET (offset , pptr );
1036- fprintf (stderr , "%zd" , offset );
1037- pptr ++ ;
1033+ case META_SCS :
1034+ fprintf (stderr , "META (*scan_substring:" );
10381035 break ;
10391036
1040- case META_SCS_NEXT_NAME :
1041- fprintf (stderr , "META_SCS_NEXT_NAME length=%d offset=" , * pptr ++ );
1042- GETOFFSET (offset , pptr );
1043- fprintf (stderr , "%zd" , offset );
1037+ case META_SCS_NAME :
1038+ fprintf (stderr , "META_SCS_NAME length=%d relative_offset=%d" , * pptr ++ , (int )meta_arg );
10441039 break ;
10451040
1046- case META_SCS_NEXT_NUMBER :
1047- fprintf (stderr , "META_SCS_NEXT_NUMBER %d offset=" , pptr [SIZEOFFSET ]);
1048- GETOFFSET (offset , pptr );
1049- fprintf (stderr , "%zd" , offset );
1050- pptr ++ ;
1041+ case META_SCS_NUMBER :
1042+ fprintf (stderr , "META_SCS_NUMBER %d relative_offset=%d" , * pptr ++ , (int )meta_arg );
10511043 break ;
10521044
10531045 case META_MARK :
@@ -4504,11 +4496,14 @@ while (ptr < ptrend)
45044496 }
45054497
45064498 ptr ++ ;
4499+ * parsed_pattern ++ = META_SCS ;
45074500 /* Temporary variable, zero in the first iteration. */
4508- meta = 0 ;
4501+ offset = 0 ;
45094502
45104503 for (;;)
45114504 {
4505+ PCRE2_SIZE next_offset = (PCRE2_SIZE )(ptr - cb -> start_pattern );
4506+
45124507 /* Handle (scan_substring:([+-]number)... */
45134508 if (read_number (& ptr , ptrend , cb -> bracount , MAX_GROUP_NUMBER , ERR61 ,
45144509 & i , & errorcode ))
@@ -4519,10 +4514,8 @@ while (ptr < ptrend)
45194514 errorcode = ERR15 ;
45204515 goto FAILED ;
45214516 }
4522- * parsed_pattern ++ = meta ? META_SCS_NEXT_NUMBER : META_SCS_NUMBER ;
4523- offset = (PCRE2_SIZE )(ptr - cb -> start_pattern - 2 );
4524- PUTOFFSET (offset , parsed_pattern );
4525- * parsed_pattern ++ = i ;
4517+ meta = META_SCS_NUMBER ;
4518+ namelen = (uint32_t )i ;
45264519 }
45274520 else if (errorcode != 0 ) goto FAILED ; /* Number too big */
45284521 else
@@ -4540,14 +4533,28 @@ while (ptr < ptrend)
45404533 goto FAILED ;
45414534 }
45424535
4543- if (!read_name (& ptr , ptrend , utf , terminator , & offset , & name ,
4544- & namelen , & errorcode , cb )) goto FAILED ;
4536+ if (!read_name (& ptr , ptrend , utf , terminator , & next_offset ,
4537+ & name , & namelen , & errorcode , cb )) goto FAILED ;
45454538
4546- * parsed_pattern ++ = meta ? META_SCS_NEXT_NAME : META_SCS_NAME ;
4547- * parsed_pattern ++ = namelen ;
4548- PUTOFFSET (offset , parsed_pattern );
4539+ meta = META_SCS_NAME ;
4540+ }
4541+
4542+ PCRE2_ASSERT (next_offset > 0 );
4543+ if (offset == 0 || (next_offset - offset ) >= 0x10000 )
4544+ {
4545+ * parsed_pattern ++ = META_OFFSET ;
4546+ PUTOFFSET (next_offset , parsed_pattern );
4547+ offset = next_offset ;
45494548 }
45504549
4550+ /* The offset is encoded as a relative offset, because for some
4551+ inputs such as ",2" in (*scs:(1,2,3)...), we only have space for
4552+ two uint32_t values, and an opcode and absolute offset may require
4553+ three uint32_t values. */
4554+ * parsed_pattern ++ = meta | (uint32_t )(next_offset - offset );
4555+ * parsed_pattern ++ = namelen ;
4556+ offset = next_offset ;
4557+
45514558 if (ptr >= ptrend ) goto UNCLOSED_PARENTHESIS ;
45524559
45534560 if (* ptr == CHAR_RIGHT_PARENTHESIS ) break ;
@@ -4559,7 +4566,6 @@ while (ptr < ptrend)
45594566 }
45604567
45614568 ptr ++ ;
4562- meta = 1 ;
45634569 }
45644570 ptr ++ ;
45654571 goto POST_ASSERTION ;
@@ -5807,6 +5813,8 @@ uint32_t meta, meta_arg;
58075813uint32_t firstcuflags , reqcuflags ;
58085814uint32_t zeroreqcuflags , zerofirstcuflags ;
58095815uint32_t req_caseopt , reqvary , tempreqvary ;
5816+ /* Some opcodes, such as META_SCS_NUMBER or META_SCS_NAME,
5817+ depends on the previous value of offset. */
58105818PCRE2_SIZE offset = 0 ;
58115819PCRE2_SIZE length_prevgroup = 0 ;
58125820PCRE2_UCHAR * code = * codeptr ;
@@ -6294,6 +6302,15 @@ for (;; pptr++)
62946302 req_caseopt = ((options & PCRE2_CASELESS ) != 0 )? REQ_CASELESS : 0 ;
62956303 break ;
62966304
6305+ case META_OFFSET :
6306+ GETPLUSOFFSET (offset , pptr );
6307+ break ;
6308+
6309+ case META_SCS :
6310+ bravalue = OP_ASSERT_SCS ;
6311+ cb -> assert_depth += 1 ;
6312+ goto GROUP_PROCESS ;
6313+
62976314
62986315 /* ===================================================================*/
62996316 /* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous
@@ -6305,17 +6322,19 @@ for (;; pptr++)
63056322 case META_COND_RNUMBER : /* (?(Rdigits) */
63066323 case META_COND_NAME : /* (?(name) or (?'name') or ?(<name>) */
63076324 case META_COND_RNAME : /* (?(R&name) - test for recursion */
6308- case META_SCS_NAME : /* (*scan_substring:'name') or (*scan_substring:(<name>)) */
6309- case META_SCS_NEXT_NAME : /* More names for scan substring. */
6310- bravalue = meta == META_SCS_NAME ? OP_ASSERT_SCS : OP_COND ;
6325+ case META_SCS_NAME : /* Name of scan substring */
6326+ bravalue = OP_COND ;
63116327 {
63126328 int count , index ;
63136329 unsigned int i ;
63146330 PCRE2_SPTR name ;
63156331 named_group * ng = cb -> named_groups ;
63166332 uint32_t length = * (++ pptr );
63176333
6318- GETPLUSOFFSET (offset , pptr );
6334+ if (meta == META_SCS_NAME )
6335+ offset += meta_arg ;
6336+ else
6337+ GETPLUSOFFSET (offset , pptr );
63196338 name = cb -> start_pattern + offset ;
63206339
63216340 /* In the first pass, the names generated in the pre-pass are available,
@@ -6371,7 +6390,7 @@ for (;; pptr++)
63716390 /* Otherwise found a duplicated name */
63726391 if (ng -> number > cb -> top_backref ) cb -> top_backref = ng -> number ;
63736392
6374- if (meta == META_SCS_NEXT_NAME )
6393+ if (meta == META_SCS_NAME )
63756394 {
63766395 code [0 ] = OP_CREF ;
63776396 PUT2 (code , 1 , ng -> number );
@@ -6395,7 +6414,7 @@ for (;; pptr++)
63956414 if (lengthptr == NULL && !find_dupname_details (name , length , & index ,
63966415 & count , errorcodeptr , cb )) return 0 ;
63976416
6398- if (meta == META_SCS_NEXT_NAME )
6417+ if (meta == META_SCS_NAME )
63996418 {
64006419 code [0 ] = OP_DNCREF ;
64016420 PUT2 (code , 1 , index );
@@ -6415,9 +6434,8 @@ for (;; pptr++)
64156434 PUT2 (code , 2 + LINK_SIZE + IMM2_SIZE , count );
64166435 }
64176436
6418- if (meta != META_SCS_NAME ) goto GROUP_PROCESS_NOTE_EMPTY ;
6419- cb -> assert_depth += 1 ;
6420- goto GROUP_PROCESS ;
6437+ PCRE2_ASSERT (meta != META_SCS_NAME );
6438+ goto GROUP_PROCESS_NOTE_EMPTY ;
64216439
64226440 /* The DEFINE condition is always false. Its internal groups may never
64236441 be called, so matched_char must remain false, hence the jump to
@@ -6434,9 +6452,12 @@ for (;; pptr++)
64346452
64356453 case META_COND_NUMBER :
64366454 case META_SCS_NUMBER :
6437- case META_SCS_NEXT_NUMBER :
6438- bravalue = meta == META_SCS_NUMBER ? OP_ASSERT_SCS : OP_COND ;
6439- GETPLUSOFFSET (offset , pptr );
6455+ bravalue = OP_COND ;
6456+ if (meta == META_SCS_NUMBER )
6457+ offset += meta_arg ;
6458+ else
6459+ GETPLUSOFFSET (offset , pptr );
6460+
64406461 groupnumber = * (++ pptr );
64416462 if (groupnumber > cb -> bracount )
64426463 {
@@ -6446,7 +6467,7 @@ for (;; pptr++)
64466467 }
64476468 if (groupnumber > cb -> top_backref ) cb -> top_backref = groupnumber ;
64486469
6449- if (meta == META_SCS_NEXT_NUMBER )
6470+ if (meta == META_SCS_NUMBER )
64506471 {
64516472 code [0 ] = OP_CREF ;
64526473 PUT2 (code , 1 , groupnumber );
@@ -6455,13 +6476,11 @@ for (;; pptr++)
64556476 }
64566477
64576478 /* Point at initial ( for too many branches error */
6458- if ( meta != META_SCS_NUMBER ) offset -= 2 ;
6479+ offset -= 2 ;
64596480 code [1 + LINK_SIZE ] = OP_CREF ;
64606481 skipunits = 1 + IMM2_SIZE ;
64616482 PUT2 (code , 2 + LINK_SIZE , groupnumber );
6462- if (meta != META_SCS_NUMBER ) goto GROUP_PROCESS_NOTE_EMPTY ;
6463- cb -> assert_depth += 1 ;
6464- goto GROUP_PROCESS ;
6483+ goto GROUP_PROCESS_NOTE_EMPTY ;
64656484
64666485 /* Test for the PCRE2 version. */
64676486
@@ -9089,6 +9108,7 @@ for (;; pptr++)
90899108 case META_COND_RNAME :
90909109 case META_COND_RNUMBER :
90919110 case META_COND_VERSION :
9111+ case META_SCS :
90929112 case META_LOOKAHEAD :
90939113 case META_LOOKAHEADNOT :
90949114 case META_LOOKAHEAD_NA :
@@ -9350,6 +9370,7 @@ for (;; pptr++)
93509370 case META_LOOKAHEAD :
93519371 case META_LOOKAHEADNOT :
93529372 case META_LOOKAHEAD_NA :
9373+ case META_SCS :
93539374 * errcodeptr = check_lookbehinds (pptr + 1 , & pptr , recurses , cb , lcptr );
93549375 if (* errcodeptr != 0 ) return -1 ;
93559376
@@ -9781,8 +9802,7 @@ for (; *pptr != META_END; pptr++)
97819802 case META_ATOMIC :
97829803 case META_CAPTURE :
97839804 case META_COND_ASSERT :
9784- case META_SCS_NAME :
9785- case META_SCS_NUMBER :
9805+ case META_SCS :
97869806 case META_LOOKAHEAD :
97879807 case META_LOOKAHEADNOT :
97889808 case META_LOOKAHEAD_NA :
@@ -9820,6 +9840,7 @@ for (; *pptr != META_END; pptr++)
98209840 case META_THEN :
98219841 break ;
98229842
9843+ case META_OFFSET :
98239844 case META_RECURSE :
98249845 pptr += SIZEOFFSET ;
98259846 break ;
@@ -9838,8 +9859,6 @@ for (; *pptr != META_END; pptr++)
98389859 case META_COND_NUMBER :
98399860 case META_COND_RNAME :
98409861 case META_COND_RNUMBER :
9841- case META_SCS_NEXT_NAME :
9842- case META_SCS_NEXT_NUMBER :
98439862 pptr += 1 + SIZEOFFSET ;
98449863 nestlevel ++ ;
98459864 break ;
@@ -9856,6 +9875,8 @@ for (; *pptr != META_END; pptr++)
98569875 case META_BIGVALUE :
98579876 case META_POSIX :
98589877 case META_POSIX_NEG :
9878+ case META_SCS_NAME :
9879+ case META_SCS_NUMBER :
98599880 pptr += 1 ;
98609881 break ;
98619882
0 commit comments