@@ -1681,6 +1681,7 @@ enum class _Rx_unwind_ops {
16811681 _Disjunction_eval_alt_always,
16821682 _Do_nothing,
16831683 _Loop_simple_nongreedy,
1684+ _Loop_simple_greedy,
16841685};
16851686
16861687template <class _BidIt>
@@ -1815,7 +1816,6 @@ private:
18151816 void _Decrease_stack_usage_count();
18161817 void _Increase_complexity_count();
18171818
1818- bool _Do_rep0(_Node_rep*);
18191819 bool _Do_rep(_Node_rep*, bool, int);
18201820 void _Prepare_rep(_Node_rep*);
18211821 bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
@@ -3413,72 +3413,6 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_coun
34133413 }
34143414}
34153415
3416- template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3417- bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node) {
3418- // apply repetition to loop with no nested if/do
3419- int _Ix = _Node->_Min;
3420- const size_t _Frame_idx = _Loop_vals[_Node->_Loop_number]._Loop_frame_idx;
3421- _Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 2;
3422-
3423- _Tgt_state_t<_It> _Final;
3424- bool _Matched0 = false;
3425- _It _Saved_pos = _Tgt_state._Cur;
3426- bool _Done = false;
3427-
3428- if (_Match_pat(_Node->_End_rep->_Next)) {
3429- // record an acceptable match and continue
3430- _Final = _Tgt_state;
3431- _Matched0 = true;
3432- }
3433-
3434- if (_Ix == 0 && _Node->_Max != 0) {
3435- _Tgt_state._Cur = _Saved_pos;
3436- _Tgt_state._Grp_valid = _Frames[_Frame_idx]._Match_state._Grp_valid;
3437-
3438- if (!_Match_pat(_Node->_Next)) { // rep match failed, we are done
3439- _Done = true;
3440- } else if (_Saved_pos == _Tgt_state._Cur) { // match empty, try no more repetitions
3441- _Done = true;
3442- // we only potentially accept/try tail for POSIX
3443- if ((_Sflags & regex_constants::_Any_posix) && _Match_pat(_Node->_End_rep->_Next)) {
3444- return true; // go with current match
3445- }
3446- } else {
3447- _Saved_pos = _Tgt_state._Cur;
3448- if (_Match_pat(_Node->_End_rep->_Next)) {
3449- // record match and continue
3450- _Final = _Tgt_state;
3451- _Matched0 = true;
3452- }
3453- }
3454- _Ix = 1;
3455- }
3456-
3457- if (!_Done) {
3458- while (_Node->_Max == -1 || _Ix++ < _Node->_Max) { // try another rep/tail match
3459- _Tgt_state._Cur = _Saved_pos;
3460- _Tgt_state._Grp_valid = _Frames[_Frame_idx]._Match_state._Grp_valid;
3461- if (!_Match_pat(_Node->_Next) || _Tgt_state._Cur == _Saved_pos) {
3462- break; // rep match failed, quit loop
3463- }
3464-
3465- // since loop is branchless, empty rep match is not possible at this point
3466- _Saved_pos = _Tgt_state._Cur;
3467- if (_Match_pat(_Node->_End_rep->_Next)) {
3468- // record match and continue
3469- _Final = _Tgt_state;
3470- _Matched0 = true;
3471- }
3472- }
3473- }
3474-
3475- if (_Matched0) { // record final match
3476- _Tgt_state = _Final;
3477- }
3478-
3479- return _Matched0;
3480- }
3481-
34823416template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34833417bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node, bool _Greedy, int _Init_idx) {
34843418 // apply repetition
@@ -4117,32 +4051,31 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41174051 break;
41184052
41194053 case _N_rep:
4120- {
4054+ { // handle start of loop
41214055 auto _Node = static_cast<_Node_rep*>(_Nx);
41224056 _Prepare_rep(_Node);
41234057 bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
41244058
41254059 if (_Node->_Simple_loop == 1) {
41264060 auto& _Sav = _Loop_vals[_Node->_Loop_number];
41274061 _Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
4128- if (_Node->_Min > 0) { // try to match a rep
4129- _Increase_complexity_count();
4062+ _Increase_complexity_count();
4063+ if (_Node->_Min > 0 || (_Greedy && !_Longest && _Node->_Max != 0)) { // try a rep first
41304064 _Sav._Loop_idx = 1;
41314065 // _Next is already assigned correctly for matching a rep
4132- } else if (!_Greedy || _Longest) { // non-greedy matching
4133- _Increase_complexity_count();
41344066
4135- // try tail first
4067+ // set up stack unwinding for greedy matching if no rep is allowed
4068+ if (_Node->_Min == 0) {
4069+ _Push_frame(_Rx_unwind_ops::_Loop_simple_greedy, _Node);
4070+ }
4071+ } else { // try tail first
41364072 _Sav._Loop_idx = 0;
41374073 _Next = _Node->_End_rep->_Next;
41384074
41394075 // set up stack unwinding for non-greedy matching if at least one rep is allowed
41404076 if (_Node->_Max != 0) {
41414077 _Push_frame(_Rx_unwind_ops::_Loop_simple_nongreedy, _Node);
41424078 }
4143- } else {
4144- _Failed = !_Do_rep0(_Node);
4145- _Next = nullptr;
41464079 }
41474080 } else {
41484081 _Failed = !_Do_rep(_Node, _Greedy, 0);
@@ -4153,7 +4086,7 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41534086 break;
41544087
41554088 case _N_end_rep:
4156- {
4089+ { // handle end of loop
41574090 _Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
41584091 auto& _Sav = _Loop_vals[_Nr->_Loop_number];
41594092 bool _Greedy = (_Nr->_Flags & _Fl_greedy) != 0;
@@ -4163,31 +4096,36 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41634096 == _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // initial match empty
41644097 // loop is branchless, so it will only ever match empty strings
41654098 // -> we only try tail for POSIX or if minimum number of reps is non-zero
4166- if ((_Sflags & regex_constants::_Any_posix) || _Nr->_Min > 0) {
4167- _Increase_complexity_count();
4168- // _Next is already assigned correctly for matching tail
4169- } else {
4099+ // _Next is already assigned correctly for matching tail
4100+
4101+ if (!(_Sflags & regex_constants::_Any_posix) && _Nr->_Min == 0) {
41704102 _Failed = true;
41714103 }
41724104 } else if (_Sav._Loop_idx < _Nr->_Min) { // at least one more rep to reach minimum
4173- _Increase_complexity_count();
4174-
41754105 _Next = _Nr->_Next;
41764106 // GH-5365: We have to reset the capture groups from the second iteration on.
41774107 _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
41784108 ++_Sav._Loop_idx;
4179- } else if (_Longest || !_Greedy) {
4180- _Increase_complexity_count();
4109+ } else if (_Greedy && !_Longest && _Sav._Loop_idx != _Nr->_Max) { // one more rep to try next
4110+ // set up stack unwinding for greedy matching
4111+ _Push_frame(_Rx_unwind_ops::_Loop_simple_greedy, _Nr);
4112+
4113+ _Next = _Nr->_Next;
4114+ // GH-5365: We have to reset the capture groups from the second iteration on.
4115+ _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4116+ if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
4117+ ++_Sav._Loop_idx;
4118+ }
4119+ } else { // non-greedy matching or greedy matching with maximum reached
41814120 // set up stack unwinding for non-greedy matching if one more rep is allowed
41824121 if (_Sav._Loop_idx != _Nr->_Max) {
41834122 _Push_frame(_Rx_unwind_ops::_Loop_simple_nongreedy, _Nr);
41844123 }
41854124 // _Next is already assigned correctly for matching tail
4186- } else if (_Sav._Loop_idx == _Nr->_Min) { // greedy and minimum number of reps reached
4187- _Failed = !_Do_rep0(_Nr);
4188- _Next = nullptr;
4189- } else { // internal _Match_pat(_Node->_Next) call in _Do_rep0()
4190- _Next = nullptr;
4125+ }
4126+
4127+ if (!_Failed) {
4128+ _Increase_complexity_count();
41914129 }
41924130 } else {
41934131 _Failed = !_Do_rep(_Nr, _Greedy, _Sav._Loop_idx);
@@ -4297,6 +4235,20 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42974235 }
42984236 break;
42994237
4238+ case _Rx_unwind_ops::_Loop_simple_greedy:
4239+ // try tail if matching one more rep failed
4240+ if (_Failed) {
4241+ auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4242+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
4243+
4244+ _Increase_complexity_count();
4245+ _Nx = _Node->_End_rep->_Next;
4246+ _Tgt_state._Cur = _Frame._Match_state._Cur;
4247+ _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4248+ _Failed = false;
4249+ }
4250+ break;
4251+
43004252 default:
43014253#if _ITERATOR_DEBUG_LEVEL != 0
43024254 _STL_REPORT_ERROR("internal stack of regex matcher corrupted");
@@ -5299,7 +5251,7 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity(
52995251 break;
53005252 case _N_rep:
53015253 // _Node_rep inside another _Node_rep makes both not simple if _Outer_rep can be repeated more than once
5302- // because _Matcher3::_Do_rep0() does not reset capture group boundaries when control is returned to it .
5254+ // because the matcher does not reset capture group boundaries when handling simple loops .
53035255 // If _Outer_rep can repeat at most once, we have to analyze the structure of the inner loop.
53045256 if (_Outer_rep) {
53055257 _Outer_rep->_Simple_loop = 0;
0 commit comments