@@ -1586,7 +1586,7 @@ public:
15861586 _Loop_number(_Number) {}
15871587
15881588 const int _Min;
1589- const int _Max;
1589+ const int _Max; // non-negative if bounded, -1 if unbounded
15901590 _Node_end_rep* _End_rep;
15911591 unsigned int _Loop_number;
15921592 int _Simple_loop = -1; // -1 undetermined, 0 contains if/do, 1 simple
@@ -1680,6 +1680,7 @@ enum class _Rx_unwind_ops {
16801680 _Disjunction_eval_alt_on_failure,
16811681 _Disjunction_eval_alt_always,
16821682 _Do_nothing,
1683+ _Loop_simple_nongreedy,
16831684};
16841685
16851686template <class _BidIt>
@@ -1814,7 +1815,7 @@ private:
18141815 void _Decrease_stack_usage_count();
18151816 void _Increase_complexity_count();
18161817
1817- bool _Do_rep0(_Node_rep*, bool );
1818+ bool _Do_rep0(_Node_rep*);
18181819 bool _Do_rep(_Node_rep*, bool, int);
18191820 void _Prepare_rep(_Node_rep*);
18201821 bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
@@ -3413,22 +3414,18 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_coun
34133414}
34143415
34153416template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3416- bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node, bool _Greedy ) {
3417+ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node) {
34173418 // apply repetition to loop with no nested if/do
34183419 int _Ix = _Node->_Min;
34193420 const size_t _Frame_idx = _Loop_vals[_Node->_Loop_number]._Loop_frame_idx;
3420- _Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 1 ;
3421+ _Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 2 ;
34213422
34223423 _Tgt_state_t<_It> _Final;
34233424 bool _Matched0 = false;
34243425 _It _Saved_pos = _Tgt_state._Cur;
34253426 bool _Done = false;
34263427
34273428 if (_Match_pat(_Node->_End_rep->_Next)) {
3428- if (!_Greedy) {
3429- return true; // go with current match
3430- }
3431-
34323429 // record an acceptable match and continue
34333430 _Final = _Tgt_state;
34343431 _Matched0 = true;
@@ -3449,10 +3446,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34493446 } else {
34503447 _Saved_pos = _Tgt_state._Cur;
34513448 if (_Match_pat(_Node->_End_rep->_Next)) {
3452- if (!_Greedy) {
3453- return true; // go with current match
3454- }
3455-
34563449 // record match and continue
34573450 _Final = _Tgt_state;
34583451 _Matched0 = true;
@@ -3472,10 +3465,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
34723465 // since loop is branchless, empty rep match is not possible at this point
34733466 _Saved_pos = _Tgt_state._Cur;
34743467 if (_Match_pat(_Node->_End_rep->_Next)) {
3475- if (!_Greedy) {
3476- return true; // go with current match
3477- }
3478-
34793468 // record match and continue
34803469 _Final = _Tgt_state;
34813470 _Matched0 = true;
@@ -4135,13 +4124,25 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41354124
41364125 if (_Node->_Simple_loop == 1) {
41374126 auto& _Sav = _Loop_vals[_Node->_Loop_number];
4138- _Sav._Loop_idx = 1;
41394127 _Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
4140- if (_Node->_Min == 0) {
4141- _Failed = !_Do_rep0(_Node, _Greedy);
4142- _Next = nullptr;
4143- } else {
4128+ if (_Node->_Min > 0) { // try to match a rep
41444129 _Increase_complexity_count();
4130+ _Sav._Loop_idx = 1;
4131+ // _Next is already assigned correctly for matching a rep
4132+ } else if (!_Greedy || _Longest) { // non-greedy matching
4133+ _Increase_complexity_count();
4134+
4135+ // try tail first
4136+ _Sav._Loop_idx = 0;
4137+ _Next = _Node->_End_rep->_Next;
4138+
4139+ // set up stack unwinding for non-greedy matching if at least one rep is allowed
4140+ if (_Node->_Max != 0) {
4141+ _Push_frame(_Rx_unwind_ops::_Loop_simple_nongreedy, _Node);
4142+ }
4143+ } else {
4144+ _Failed = !_Do_rep0(_Node);
4145+ _Next = nullptr;
41454146 }
41464147 } else {
41474148 _Failed = !_Do_rep(_Node, _Greedy, 0);
@@ -4155,29 +4156,41 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41554156 {
41564157 _Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
41574158 auto& _Sav = _Loop_vals[_Nr->_Loop_number];
4159+ bool _Greedy = (_Nr->_Flags & _Fl_greedy) != 0;
41584160 if (_Nr->_Simple_loop != 0) {
4159- if (_Sav._Loop_idx <= _Nr->_Min) {
4160- if (_Sav._Loop_idx == 1
4161- && _Tgt_state._Cur == _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // match empty
4162- // loop is branchless, so it will only ever match empty strings
4163- // -> skip all other matches as they don't change state and immediately try tail
4161+ if (_Sav._Loop_idx == 1
4162+ && _Tgt_state._Cur
4163+ == _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // initial match empty
4164+ // loop is branchless, so it will only ever match empty strings
4165+ // -> we only try tail for POSIX or if minimum number of reps is non-zero
4166+ if ((_Sflags & regex_constants::_Any_posix) || _Nr->_Min > 0) {
41644167 _Increase_complexity_count();
41654168 // _Next is already assigned correctly for matching tail
4166- } else if (_Sav._Loop_idx < _Nr->_Min) { // needs at least one more rep to reach minimum
4167- _Increase_complexity_count();
4168- // GH-5365: We have to reset the capture groups from the second iteration on.
4169- _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4170- _Next = _Nr->_Next;
4171- ++_Sav._Loop_idx;
4172- } else { // minimum number of reps reached
4173- _Failed = !_Do_rep0(_Nr, (_Nr->_Flags & _Fl_greedy) != 0);
4174- _Next = nullptr;
4169+ } else {
4170+ _Failed = true;
41754171 }
4172+ } else if (_Sav._Loop_idx < _Nr->_Min) { // at least one more rep to reach minimum
4173+ _Increase_complexity_count();
4174+
4175+ _Next = _Nr->_Next;
4176+ // GH-5365: We have to reset the capture groups from the second iteration on.
4177+ _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4178+ ++_Sav._Loop_idx;
4179+ } else if (_Longest || !_Greedy) {
4180+ _Increase_complexity_count();
4181+ // set up stack unwinding for non-greedy matching if one more rep is allowed
4182+ if (_Sav._Loop_idx != _Nr->_Max) {
4183+ _Push_frame(_Rx_unwind_ops::_Loop_simple_nongreedy, _Nr);
4184+ }
4185+ // _Next is already assigned correctly for matching tail
4186+ } else if (_Sav._Loop_idx == _Nr->_Min) { // greedy and minimum number of reps reached
4187+ _Failed = !_Do_rep0(_Nr);
4188+ _Next = nullptr;
41764189 } else { // internal _Match_pat(_Node->_Next) call in _Do_rep0()
41774190 _Next = nullptr;
41784191 }
41794192 } else {
4180- _Failed = !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0 , _Sav._Loop_idx);
4193+ _Failed = !_Do_rep(_Nr, _Greedy , _Sav._Loop_idx);
41814194 _Next = nullptr;
41824195 }
41834196 break;
@@ -4267,6 +4280,23 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42674280 case _Rx_unwind_ops::_Do_nothing:
42684281 break;
42694282
4283+ case _Rx_unwind_ops::_Loop_simple_nongreedy:
4284+ // try one more rep after matching tail if necessary
4285+ if (_Longest || _Failed) {
4286+ auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4287+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
4288+
4289+ _Increase_complexity_count();
4290+ _Nx = _Node->_Next;
4291+ _Tgt_state._Cur = _Frame._Match_state._Cur;
4292+ _Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
4293+ _Failed = false;
4294+ if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
4295+ ++_Sav._Loop_idx;
4296+ }
4297+ }
4298+ break;
4299+
42704300 default:
42714301#if _ITERATOR_DEBUG_LEVEL != 0
42724302 _STL_REPORT_ERROR("internal stack of regex matcher corrupted");
0 commit comments