Skip to content

Commit 71ebc1b

Browse files
<regex>: Process generic loops non-recursively (#5798)
Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent a98226c commit 71ebc1b

File tree

2 files changed

+251
-75
lines changed

2 files changed

+251
-75
lines changed

stl/inc/regex

Lines changed: 135 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,14 +1682,19 @@ enum class _Rx_unwind_ops {
16821682
_Do_nothing,
16831683
_Loop_simple_nongreedy,
16841684
_Loop_simple_greedy,
1685+
_Loop_nongreedy,
1686+
_Loop_greedy,
1687+
_Loop_restore_vals,
16851688
};
16861689

16871690
template <class _BidIt>
16881691
class _Rx_state_frame_t {
16891692
public:
16901693
_Rx_unwind_ops _Code;
1694+
int _Loop_idx_sav;
16911695
_Node_base* _Node;
16921696
_Tgt_state_t<_BidIt> _Match_state;
1697+
size_t _Loop_frame_idx_sav;
16931698
};
16941699

16951700
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
@@ -1816,7 +1821,6 @@ private:
18161821
void _Decrease_stack_usage_count();
18171822
void _Increase_complexity_count();
18181823

1819-
bool _Do_rep(_Node_rep*, bool, int);
18201824
void _Prepare_rep(_Node_rep*);
18211825
bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
18221826
_It _Do_class(_Node_base*, _It);
@@ -3372,7 +3376,7 @@ void _Builder2<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory
33723376
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
33733377
size_t _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Push_frame(_Rx_unwind_ops _Code, _Node_base* _Node) {
33743378
if (_Frames_count >= _Frames.size()) {
3375-
_Frames.push_back({_Code, _Node, _Tgt_state});
3379+
_Frames.push_back({_Code, 0, _Node, _Tgt_state, size_t{}});
33763380
} else {
33773381
auto& _Frame = _Frames[_Frames_count];
33783382
_Frame._Code = _Code;
@@ -3413,74 +3417,6 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_coun
34133417
}
34143418
}
34153419

3416-
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
3417-
bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node, bool _Greedy, int _Init_idx) {
3418-
// apply repetition
3419-
bool _Matched0 = false;
3420-
_Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
3421-
const int _Loop_idx_sav = _Psav->_Loop_idx;
3422-
const size_t _Loop_frame_idx_sav = _Psav->_Loop_frame_idx;
3423-
const size_t _Frame_idx = _Push_frame();
3424-
const bool _Progress = _Init_idx == 0 || _Frames[_Loop_frame_idx_sav]._Match_state._Cur != _Tgt_state._Cur;
3425-
3426-
if (_Init_idx < _Node->_Min) { // try another required match
3427-
_Psav->_Loop_frame_idx = _Frame_idx;
3428-
_Psav->_Loop_idx = _Progress ? _Init_idx + 1 : _Node->_Min; // try only one more match after an empty match
3429-
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3430-
_Tgt_state._Grp_valid.end(), false);
3431-
_Matched0 = _Match_pat(_Node->_Next);
3432-
} else if (_Init_idx == _Node->_Min || _Progress) {
3433-
if (0 <= _Node->_Max && _Node->_Max <= _Init_idx) {
3434-
_Matched0 = _Match_pat(_Node->_End_rep->_Next); // reps done, try tail
3435-
} else if (_Longest) { // longest, try any number of repetitions
3436-
3437-
// match with no further repetition
3438-
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
3439-
3440-
// try to match with one more repetition
3441-
_Tgt_state = _Frames[_Frame_idx]._Match_state;
3442-
_Psav->_Loop_idx = _Init_idx + 1;
3443-
_Psav->_Loop_frame_idx = _Frame_idx;
3444-
if (_Match_pat(_Node->_Next)) { // always call _Match_pat, even when _Matched0 is already true
3445-
_Matched0 = true;
3446-
}
3447-
} else if (!_Greedy) { // not greedy, favor minimum number of reps
3448-
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
3449-
if (!_Matched0) { // tail failed, try another rep
3450-
_Tgt_state = _Frames[_Frame_idx]._Match_state;
3451-
_Psav->_Loop_idx = _Init_idx + 1;
3452-
_Psav->_Loop_frame_idx = _Frame_idx;
3453-
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3454-
_Tgt_state._Grp_valid.end(), false);
3455-
_Matched0 = _Match_pat(_Node->_Next);
3456-
}
3457-
} else { // greedy, favor maximum number of reps,
3458-
// so try another rep
3459-
_Psav->_Loop_idx = _Init_idx + 1;
3460-
_Psav->_Loop_frame_idx = _Frame_idx;
3461-
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Psav->_Group_first),
3462-
_Tgt_state._Grp_valid.end(), false);
3463-
_Matched0 = _Match_pat(_Node->_Next);
3464-
3465-
if (!_Matched0) { // rep failed, try tail
3466-
_Psav->_Loop_idx = _Loop_idx_sav;
3467-
_Psav->_Loop_frame_idx = _Loop_frame_idx_sav;
3468-
_Tgt_state = _Frames[_Frame_idx]._Match_state;
3469-
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
3470-
}
3471-
}
3472-
} else if (_Init_idx == 1 && (_Sflags & regex_constants::_Any_posix)) {
3473-
// POSIX allows an empty repetition if the subexpression is matched only once,
3474-
// so try tail
3475-
_Matched0 = _Match_pat(_Node->_End_rep->_Next);
3476-
}
3477-
3478-
_Psav->_Loop_idx = _Loop_idx_sav;
3479-
_Psav->_Loop_frame_idx = _Loop_frame_idx_sav;
3480-
_Pop_frame(_Frame_idx);
3481-
return _Matched0;
3482-
}
3483-
34843420
template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
34853421
void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Prepare_rep(_Node_rep* _Node) {
34863422
_Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];
@@ -4055,9 +3991,9 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
40553991
auto _Node = static_cast<_Node_rep*>(_Nx);
40563992
_Prepare_rep(_Node);
40573993
bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
3994+
auto& _Sav = _Loop_vals[_Node->_Loop_number];
40583995

40593996
if (_Node->_Simple_loop == 1) {
4060-
auto& _Sav = _Loop_vals[_Node->_Loop_number];
40613997
_Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
40623998
_Increase_complexity_count();
40633999
if (_Node->_Min > 0 || (_Greedy && !_Longest && _Node->_Max != 0)) { // try a rep first
@@ -4078,8 +4014,33 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
40784014
}
40794015
}
40804016
} else {
4081-
_Failed = !_Do_rep(_Node, _Greedy, 0);
4082-
_Next = nullptr;
4017+
if (_Node->_Min > 0 || (_Greedy && !_Longest && _Node->_Max != 0)) { // try a rep first
4018+
// set up stack unwinding for greedy matching or loop val restoration
4019+
const auto _Code =
4020+
_Node->_Min == 0 ? _Rx_unwind_ops::_Loop_greedy : _Rx_unwind_ops::_Loop_restore_vals;
4021+
auto _Frame_idx = _Push_frame(_Code, _Node);
4022+
auto& _Frame = _Frames[_Frame_idx];
4023+
_Frame._Loop_idx_sav = _Sav._Loop_idx;
4024+
_Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4025+
_Sav._Loop_idx = 1;
4026+
_Sav._Loop_frame_idx = _Frame_idx;
4027+
_Increase_stack_usage_count();
4028+
// _Next is already assigned correctly for matching a rep
4029+
} else { // try tail first
4030+
_Next = _Node->_End_rep->_Next;
4031+
// set up stack unwinding for non-greedy matching if at least one rep is allowed
4032+
if (_Node->_Max != 0) {
4033+
auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_nongreedy, _Node);
4034+
auto& _Frame = _Frames[_Frame_idx];
4035+
_Frame._Loop_idx_sav = _Sav._Loop_idx;
4036+
_Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4037+
_Sav._Loop_idx = 0;
4038+
_Sav._Loop_frame_idx = _Frame_idx;
4039+
_Increase_stack_usage_count();
4040+
} else {
4041+
_Increase_complexity_count();
4042+
}
4043+
}
40834044
}
40844045
}
40854046

@@ -4128,8 +4089,62 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
41284089
_Increase_complexity_count();
41294090
}
41304091
} else {
4131-
_Failed = !_Do_rep(_Nr, _Greedy, _Sav._Loop_idx);
4132-
_Next = nullptr;
4092+
const bool _Progress = _Frames[_Sav._Loop_frame_idx]._Match_state._Cur != _Tgt_state._Cur;
4093+
if (_Sav._Loop_idx < _Nr->_Min) { // try another required match
4094+
auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_restore_vals, _Nr);
4095+
auto& _Frame = _Frames[_Frame_idx];
4096+
_Frame._Loop_idx_sav = _Sav._Loop_idx;
4097+
_Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4098+
_Sav._Loop_frame_idx = _Frame_idx;
4099+
if (_Progress) {
4100+
++_Sav._Loop_idx;
4101+
} else { // try only one more match after an empty match
4102+
_Sav._Loop_idx = _Nr->_Min;
4103+
}
4104+
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Sav._Group_first),
4105+
_Tgt_state._Grp_valid.end(), false);
4106+
_Next = _Nr->_Next;
4107+
_Increase_stack_usage_count();
4108+
} else if (!_Progress) { // latest rep match empty
4109+
// An empty match is allowed if it is needed to reach the minimum number of reps.
4110+
// Moreover, POSIX allows an empty repetition if the subexpression is matched only once.
4111+
// So try tail in either case, else fail.
4112+
if (_Sav._Loop_idx != _Nr->_Min
4113+
&& !((_Sflags & regex_constants::_Any_posix) && _Sav._Loop_idx == 1)) {
4114+
_Failed = true;
4115+
} else {
4116+
_Increase_complexity_count();
4117+
}
4118+
// _Next is already assigned correctly for matching tail
4119+
} else if (_Greedy && !_Longest && _Sav._Loop_idx != _Nr->_Max) { // one more rep to try next
4120+
// set up stack unwinding for greedy matching
4121+
auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_greedy, _Nr);
4122+
auto& _Frame = _Frames[_Frame_idx];
4123+
_Frame._Loop_idx_sav = _Sav._Loop_idx;
4124+
_Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4125+
_Sav._Loop_frame_idx = _Frame_idx;
4126+
if (_Sav._Loop_idx < INT_MAX) {
4127+
++_Sav._Loop_idx;
4128+
}
4129+
4130+
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Sav._Group_first),
4131+
_Tgt_state._Grp_valid.end(), false);
4132+
_Next = _Nr->_Next;
4133+
_Increase_stack_usage_count();
4134+
} else { // non-greedy matching or greedy matching with maximum reached
4135+
// set up stack unwinding for non-greedy matching if one more rep is allowed
4136+
if (_Sav._Loop_idx != _Nr->_Max) {
4137+
auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Loop_nongreedy, _Nr);
4138+
auto& _Frame = _Frames[_Frame_idx];
4139+
_Frame._Loop_idx_sav = _Sav._Loop_idx;
4140+
_Frame._Loop_frame_idx_sav = _Sav._Loop_frame_idx;
4141+
_Sav._Loop_frame_idx = _Frame_idx;
4142+
_Increase_stack_usage_count();
4143+
} else {
4144+
_Increase_complexity_count();
4145+
}
4146+
// _Next is already assigned correctly for matching tail
4147+
}
41334148
}
41344149
break;
41354150
}
@@ -4249,6 +4264,51 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
42494264
}
42504265
break;
42514266

4267+
case _Rx_unwind_ops::_Loop_greedy:
4268+
// try tail if matching one more rep failed
4269+
if (_Failed) {
4270+
auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4271+
4272+
_Increase_complexity_count();
4273+
_Nx = _Node->_End_rep->_Next;
4274+
_Tgt_state = _Frame._Match_state;
4275+
_Failed = false;
4276+
}
4277+
_FALLTHROUGH;
4278+
4279+
case _Rx_unwind_ops::_Loop_restore_vals:
4280+
{ // restore loop vals after processing of a rep is completed
4281+
auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4282+
auto& _Sav = _Loop_vals[_Node->_Loop_number];
4283+
4284+
_Sav._Loop_idx = _Frame._Loop_idx_sav;
4285+
_Sav._Loop_frame_idx = _Frame._Loop_frame_idx_sav;
4286+
4287+
_Decrease_stack_usage_count();
4288+
}
4289+
break;
4290+
4291+
case _Rx_unwind_ops::_Loop_nongreedy:
4292+
// try another rep if matching tail failed or longest mode
4293+
if (_Failed || _Longest) {
4294+
auto _Node = static_cast<_Node_rep*>(_Frame._Node);
4295+
auto& _Sav = _Loop_vals[_Node->_Loop_number];
4296+
4297+
_Increase_complexity_count();
4298+
_Nx = _Node->_Next;
4299+
_Tgt_state = _Frame._Match_state;
4300+
_STD fill(_Tgt_state._Grp_valid.begin() + static_cast<ptrdiff_t>(_Sav._Group_first),
4301+
_Tgt_state._Grp_valid.end(), false);
4302+
_Failed = false;
4303+
if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
4304+
++_Sav._Loop_idx;
4305+
}
4306+
4307+
_Frame._Code = _Rx_unwind_ops::_Loop_restore_vals;
4308+
++_Frames_count;
4309+
}
4310+
break;
4311+
42524312
default:
42534313
#if _ITERATOR_DEBUG_LEVEL != 0
42544314
_STL_REPORT_ERROR("internal stack of regex matcher corrupted");

0 commit comments

Comments
 (0)