@@ -838,17 +838,15 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
838
838
assert_accept_frame = F ;
839
839
RRETURN (MATCH_ACCEPT );
840
840
841
- /* If recursing, we have to find the most recent recursion. */
841
+ /* For ACCEPT within a recursion, we have to find the most recent
842
+ recursion. If not in a recursion, fall through to code that is common with
843
+ OP_END. */
842
844
843
845
case OP_ACCEPT :
844
- case OP_END :
845
-
846
- /* Handle end of a recursion. */
847
-
848
846
if (Fcurrent_recurse != RECURSE_UNSET )
849
847
{
850
848
#ifdef DEBUG_SHOW_OPS
851
- fprintf (stderr , "++ End within recursion\n" );
849
+ fprintf (stderr , "++ Accept within recursion\n" );
852
850
#endif
853
851
offset = Flast_group_offset ;
854
852
for (;;)
@@ -857,7 +855,6 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
857
855
N = (heapframe * )((char * )match_data -> heapframes + offset );
858
856
P = (heapframe * )((char * )N - frame_size );
859
857
if (GF_IDMASK (N -> group_frame_type ) == GF_RECURSE ) break ;
860
-
861
858
offset = P -> last_group_offset ;
862
859
}
863
860
@@ -873,11 +870,17 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
873
870
Fecode += 1 + LINK_SIZE ;
874
871
continue ;
875
872
}
873
+ /* Fall through */
876
874
877
- /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
878
- is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
879
- start of the subject. In both cases, backtracking will then try other
880
- alternatives, if any. */
875
+ /* OP_END itself can never be reached within a recursion because that is
876
+ picked up when the OP_KET that always precedes OP_END is reached. */
877
+
878
+ case OP_END :
879
+
880
+ /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
881
+ PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
882
+ subject. In both cases, backtracking will then try other alternatives, if
883
+ any. */
881
884
882
885
if (Feptr == Fstart_match &&
883
886
((mb -> moptions & PCRE2_NOTEMPTY ) != 0 ||
@@ -5856,7 +5859,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
5856
5859
/* ===================================================================== */
5857
5860
/* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
5858
5861
starting frame was added to the chained frames in order to remember the
5859
- starting subject position for the group. */
5862
+ starting subject position for the group. (Not true for OP_BRA when it's a
5863
+ whole pattern recursion, but that is handled separately below.)*/
5860
5864
5861
5865
case OP_KET :
5862
5866
case OP_KETRMIN :
@@ -5908,8 +5912,37 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
5908
5912
5909
5913
switch (* bracode )
5910
5914
{
5911
- case OP_BRA : /* No need to do anything for these */
5912
- case OP_COND :
5915
+ /* Whole pattern recursion is handled as a recursion into group 0, but
5916
+ the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
5917
+ group - a design mistake: it should perhaps have been capture group 0.
5918
+ Anyway, that means the end of such recursion must be handled here. It is
5919
+ detected by checking for an immediately following OP_END when we are
5920
+ recursing in group 0. If this is not the end of a whole-pattern
5921
+ recursion, there is nothing to be done. */
5922
+
5923
+ case OP_BRA :
5924
+ if (Fcurrent_recurse != 0 || Fecode [1 + LINK_SIZE ] != OP_END ) break ;
5925
+
5926
+ /* It is the end of whole-pattern recursion. */
5927
+
5928
+ offset = Flast_group_offset ;
5929
+ if (offset == PCRE2_UNSET ) return PCRE2_ERROR_INTERNAL ;
5930
+ N = (heapframe * )((char * )match_data -> heapframes + offset );
5931
+ P = (heapframe * )((char * )N - frame_size );
5932
+ Flast_group_offset = P -> last_group_offset ;
5933
+
5934
+ /* Reinstate the previous set of captures and then carry on after the
5935
+ recursion call. */
5936
+
5937
+ memcpy ((char * )F + offsetof(heapframe , ovector ), P -> ovector ,
5938
+ Foffset_top * sizeof (PCRE2_SIZE ));
5939
+ Foffset_top = P -> offset_top ;
5940
+ Fcapture_last = P -> capture_last ;
5941
+ Fcurrent_recurse = P -> current_recurse ;
5942
+ Fecode = P -> ecode + 1 + LINK_SIZE ;
5943
+ continue ; /* With next opcode */
5944
+
5945
+ case OP_COND : /* No need to do anything for these */
5913
5946
case OP_SCOND :
5914
5947
break ;
5915
5948
@@ -5976,9 +6009,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
5976
6009
if (!PRIV (script_run )(P -> eptr , Feptr , utf )) RRETURN (MATCH_NOMATCH );
5977
6010
break ;
5978
6011
5979
- /* Whole-pattern recursion is coded as a recurse into group 0, so it
5980
- won't be picked up here. Instead, we catch it when the OP_END is reached.
5981
- Other recursion is handled here. */
6012
+ /* Whole-pattern recursion is coded as a recurse into group 0, and is
6013
+ handled with OP_BRA above. Other recursion is handled here. */
5982
6014
5983
6015
case OP_CBRA :
5984
6016
case OP_CBRAPOS :
0 commit comments