@@ -1682,14 +1682,19 @@ enum class _Rx_unwind_ops {
16821682 _Do_nothing,
16831683 _Loop_simple_nongreedy,
16841684 _Loop_simple_greedy,
1685+ _Loop_nongreedy,
1686+ _Loop_greedy,
1687+ _Loop_restore_vals,
16851688};
16861689
16871690template <class _BidIt>
16881691class _Rx_state_frame_t {
16891692public:
16901693 _Rx_unwind_ops _Code;
1694+ int _Loop_idx_sav;
16911695 _Node_base* _Node;
16921696 _Tgt_state_t<_BidIt> _Match_state;
1697+ size_t _Loop_frame_idx_sav;
16931698};
16941699
16951700template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
@@ -1816,7 +1821,6 @@ private:
18161821 void _Decrease_stack_usage_count();
18171822 void _Increase_complexity_count();
18181823
1819- bool _Do_rep(_Node_rep*, bool, int);
18201824 void _Prepare_rep(_Node_rep*);
18211825 bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
18221826 _It _Do_class(_Node_base*, _It);
@@ -3372,7 +3376,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory
33723376template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
33733377size_t _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Push_frame(_Rx_unwind_ops _Code, _Node_base* _Node) {
33743378 if (_Frames_count >= _Frames.size()) {
3375- _Frames.push_back({_Code, _Node, _Tgt_state});
3379+ _Frames.push_back({_Code, 0, _Node, _Tgt_state, size_t{} });
33763380 } else {
33773381 auto& _Frame = _Frames[_Frames_count];
33783382 _Frame._Code = _Code;
@@ -3413,74 +3417,6 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_coun
34133417 }
34143418}
34153419
3416- template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3417- bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node, bool _Greedy, int _Init_idx) {
3418- // apply repetition
3419- bool _Matched0 = false;
3420- _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3421- const int _Loop_idx_sav = _Psav->_Loop_idx;
3422- const size_t _Loop_frame_idx_sav = _Psav->_Loop_frame_idx;
3423- const size_t _Frame_idx = _Push_frame();
3424- const bool _Progress = _Init_idx == 0 || _Frames[_Loop_frame_idx_sav]._Match_state._Cur != _Tgt_state._Cur;
3425-
3426- if (_Init_idx < _Node->_Min) { // try another required match
3427- _Psav->_Loop_frame_idx = _Frame_idx;
3428- _Psav->_Loop_idx = _Progress ? _Init_idx + 1 : _Node->_Min; // try only one more match after an empty match
3429- _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3430- _Tgt_state._Grp_valid.end(), false);
3431- _Matched0 = _Match_pat(_Node->_Next);
3432- } else if (_Init_idx == _Node->_Min || _Progress) {
3433- if (0 <= _Node->_Max && _Node->_Max <= _Init_idx) {
3434- _Matched0 = _Match_pat(_Node->_End_rep->_Next); // reps done, try tail
3435- } else if (_Longest) { // longest, try any number of repetitions
3436-
3437- // match with no further repetition
3438- _Matched0 = _Match_pat(_Node->_End_rep->_Next);
3439-
3440- // try to match with one more repetition
3441- _Tgt_state = _Frames[_Frame_idx]._Match_state;
3442- _Psav->_Loop_idx = _Init_idx + 1;
3443- _Psav->_Loop_frame_idx = _Frame_idx;
3444- if (_Match_pat(_Node->_Next)) { // always call _Match_pat, even when _Matched0 is already true
3445- _Matched0 = true;
3446- }
3447- } else if (!_Greedy) { // not greedy, favor minimum number of reps
3448- _Matched0 = _Match_pat(_Node->_End_rep->_Next);
3449- if (!_Matched0) { // tail failed, try another rep
3450- _Tgt_state = _Frames[_Frame_idx]._Match_state;
3451- _Psav->_Loop_idx = _Init_idx + 1;
3452- _Psav->_Loop_frame_idx = _Frame_idx;
3453- _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3454- _Tgt_state._Grp_valid.end(), false);
3455- _Matched0 = _Match_pat(_Node->_Next);
3456- }
3457- } else { // greedy, favor maximum number of reps,
3458- // so try another rep
3459- _Psav->_Loop_idx = _Init_idx + 1;
3460- _Psav->_Loop_frame_idx = _Frame_idx;
3461- _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3462- _Tgt_state._Grp_valid.end(), false);
3463- _Matched0 = _Match_pat(_Node->_Next);
3464-
3465- if (!_Matched0) { // rep failed, try tail
3466- _Psav->_Loop_idx = _Loop_idx_sav;
3467- _Psav->_Loop_frame_idx = _Loop_frame_idx_sav;
3468- _Tgt_state = _Frames[_Frame_idx]._Match_state;
3469- _Matched0 = _Match_pat(_Node->_End_rep->_Next);
3470- }
3471- }
3472- } else if (_Init_idx == 1 && (_Sflags & regex_constants::_Any_posix)) {
3473- // POSIX allows an empty repetition if the subexpression is matched only once,
3474- // so try tail
3475- _Matched0 = _Match_pat(_Node->_End_rep->_Next);
3476- }
3477-
3478- _Psav->_Loop_idx = _Loop_idx_sav;
3479- _Psav->_Loop_frame_idx = _Loop_frame_idx_sav;
3480- _Pop_frame(_Frame_idx);
3481- return _Matched0;
3482- }
3483-
34843420template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34853421void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Prepare_rep(_Node_rep* _Node) {
34863422 _Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
@@ -4055,9 +3991,9 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
40553991 auto _Node = static_cast<_Node_rep*>(_Nx);
40563992 _Prepare_rep(_Node);
40573993 bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
3994+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
40583995
40593996 if (_Node->_Simple_loop == 1) {
4060- auto& _Sav = _Loop_vals[_Node->_Loop_number];
40613997 _Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
40623998 _Increase_complexity_count();
40633999 if (_Node->_Min > 0 || (_Greedy && !_Longest && _Node->_Max != 0)) { // try a rep first
@@ -4078,8 +4014,33 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
40784014 }
40794015 }
40804016 } else {
4081- _Failed = !_Do_rep(_Node, _Greedy, 0);
4082- _Next = nullptr;
4017+ if (_Node->_Min > 0 || (_Greedy && !_Longest && _Node->_Max != 0)) { // try a rep first
4018+ // set up stack unwinding for greedy matching or loop val restoration
4019+ const auto _Code =
4020+ _Node->_Min == 0 ? _Rx_unwind_ops::_Loop_greedy : _Rx_unwind_ops::_Loop_restore_vals;
4021+ auto _Frame_idx = _Push_frame(_Code, _Node);
4022+ auto& _Frame = _Frames[_Frame_idx];
4023+ _Frame._Loop_idx_sav = _Sav._Loop_idx;
4024+ _Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4025+ _Sav._Loop_idx = 1;
4026+ _Sav._Loop_frame_idx = _Frame_idx;
4027+ _Increase_stack_usage_count();
4028+ // _Next is already assigned correctly for matching a rep
4029+ } else { // try tail first
4030+ _Next = _Node->_End_rep->_Next;
4031+ // set up stack unwinding for non-greedy matching if at least one rep is allowed
4032+ if (_Node->_Max != 0) {
4033+ auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_nongreedy, _Node);
4034+ auto& _Frame = _Frames[_Frame_idx];
4035+ _Frame._Loop_idx_sav = _Sav._Loop_idx;
4036+ _Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4037+ _Sav._Loop_idx = 0;
4038+ _Sav._Loop_frame_idx = _Frame_idx;
4039+ _Increase_stack_usage_count();
4040+ } else {
4041+ _Increase_complexity_count();
4042+ }
4043+ }
40834044 }
40844045 }
40854046
@@ -4128,8 +4089,62 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41284089 _Increase_complexity_count();
41294090 }
41304091 } else {
4131- _Failed = !_Do_rep(_Nr, _Greedy, _Sav._Loop_idx);
4132- _Next = nullptr;
4092+ const bool _Progress = _Frames[_Sav._Loop_frame_idx]._Match_state._Cur != _Tgt_state._Cur;
4093+ if (_Sav._Loop_idx < _Nr->_Min) { // try another required match
4094+ auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_restore_vals, _Nr);
4095+ auto& _Frame = _Frames[_Frame_idx];
4096+ _Frame._Loop_idx_sav = _Sav._Loop_idx;
4097+ _Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4098+ _Sav._Loop_frame_idx = _Frame_idx;
4099+ if (_Progress) {
4100+ ++_Sav._Loop_idx;
4101+ } else { // try only one more match after an empty match
4102+ _Sav._Loop_idx = _Nr->_Min;
4103+ }
4104+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Sav._Group_first),
4105+ _Tgt_state._Grp_valid.end(), false);
4106+ _Next = _Nr->_Next;
4107+ _Increase_stack_usage_count();
4108+ } else if (!_Progress) { // latest rep match empty
4109+ // An empty match is allowed if it is needed to reach the minimum number of reps.
4110+ // Moreover, POSIX allows an empty repetition if the subexpression is matched only once.
4111+ // So try tail in either case, else fail.
4112+ if (_Sav._Loop_idx != _Nr->_Min
4113+ && !((_Sflags & regex_constants::_Any_posix) && _Sav._Loop_idx == 1)) {
4114+ _Failed = true;
4115+ } else {
4116+ _Increase_complexity_count();
4117+ }
4118+ // _Next is already assigned correctly for matching tail
4119+ } else if (_Greedy && !_Longest && _Sav._Loop_idx != _Nr->_Max) { // one more rep to try next
4120+ // set up stack unwinding for greedy matching
4121+ auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_greedy, _Nr);
4122+ auto& _Frame = _Frames[_Frame_idx];
4123+ _Frame._Loop_idx_sav = _Sav._Loop_idx;
4124+ _Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4125+ _Sav._Loop_frame_idx = _Frame_idx;
4126+ if (_Sav._Loop_idx < INT_MAX) {
4127+ ++_Sav._Loop_idx;
4128+ }
4129+
4130+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Sav._Group_first),
4131+ _Tgt_state._Grp_valid.end(), false);
4132+ _Next = _Nr->_Next;
4133+ _Increase_stack_usage_count();
4134+ } else { // non-greedy matching or greedy matching with maximum reached
4135+ // set up stack unwinding for non-greedy matching if one more rep is allowed
4136+ if (_Sav._Loop_idx != _Nr->_Max) {
4137+ auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_nongreedy, _Nr);
4138+ auto& _Frame = _Frames[_Frame_idx];
4139+ _Frame._Loop_idx_sav = _Sav._Loop_idx;
4140+ _Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4141+ _Sav._Loop_frame_idx = _Frame_idx;
4142+ _Increase_stack_usage_count();
4143+ } else {
4144+ _Increase_complexity_count();
4145+ }
4146+ // _Next is already assigned correctly for matching tail
4147+ }
41334148 }
41344149 break;
41354150 }
@@ -4249,6 +4264,51 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42494264 }
42504265 break;
42514266
4267+ case _Rx_unwind_ops::_Loop_greedy:
4268+ // try tail if matching one more rep failed
4269+ if (_Failed) {
4270+ auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4271+
4272+ _Increase_complexity_count();
4273+ _Nx = _Node->_End_rep->_Next;
4274+ _Tgt_state = _Frame._Match_state;
4275+ _Failed = false;
4276+ }
4277+ _FALLTHROUGH;
4278+
4279+ case _Rx_unwind_ops::_Loop_restore_vals:
4280+ { // restore loop vals after processing of a rep is completed
4281+ auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4282+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
4283+
4284+ _Sav._Loop_idx = _Frame._Loop_idx_sav;
4285+ _Sav._Loop_frame_idx = _Frame._Loop_frame_idx_sav;
4286+
4287+ _Decrease_stack_usage_count();
4288+ }
4289+ break;
4290+
4291+ case _Rx_unwind_ops::_Loop_nongreedy:
4292+ // try another rep if matching tail failed or longest mode
4293+ if (_Failed || _Longest) {
4294+ auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4295+ auto& _Sav = _Loop_vals[_Node->_Loop_number];
4296+
4297+ _Increase_complexity_count();
4298+ _Nx = _Node->_Next;
4299+ _Tgt_state = _Frame._Match_state;
4300+ _STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Sav._Group_first),
4301+ _Tgt_state._Grp_valid.end(), false);
4302+ _Failed = false;
4303+ if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
4304+ ++_Sav._Loop_idx;
4305+ }
4306+
4307+ _Frame._Code = _Rx_unwind_ops::_Loop_restore_vals;
4308+ ++_Frames_count;
4309+ }
4310+ break;
4311+
42524312 default:
42534313#if _ITERATOR_DEBUG_LEVEL != 0
42544314 _STL_REPORT_ERROR("internal stack of regex matcher corrupted");
0 commit comments