Skip to content

Commit 3f76681

Browse files
authored
<regex>: Skip initial NFA nodes that do nothing during matching (#6026)
1 parent 23b2324 commit 3f76681

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

stl/inc/regex

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,7 +1701,7 @@ class _Matcher3 { // provides ways to match a regular expression to a text seque
17011701
public:
17021702
_Matcher3(_It _Pfirst, _It _Plast, const _RxTraits& _Tr, _Root_node* _Re, unsigned int _Nx,
17031703
regex_constants::syntax_option_type _Sf, regex_constants::match_flag_type _Mf)
1704-
: _Begin(_Pfirst), _End(_Plast), _Rep(_Re), _Sflags(_Sf), _Mflags(_Mf), _Ncap(_Nx),
1704+
: _Begin(_Pfirst), _End(_Plast), _Sflags(_Sf), _Mflags(_Mf), _Ncap(_Nx),
17051705
_Longest((_Re->_Flags & _Fl_longest) && !(_Mf & regex_constants::match_any)), _Traits(_Tr) {
17061706
_Loop_vals.resize(_Re->_Loops);
17071707
_Adl_verify_range(_Pfirst, _Plast);
@@ -1721,6 +1721,12 @@ public:
17211721
_Frames_limit = _Calculate_frames_limit(_Input_length);
17221722
_Complexity_limit = _Calculate_complexity_limit(_Input_length);
17231723

1724+
// TRANSITION, ABI, GH-6025:
1725+
// The first two nodes are of types _N_begin and _N_capture with capturing group 0.
1726+
// These nodes do not affect the state of the matcher and thus can be skipped immediately
1727+
// before engaging the expensive NFA interpreter loop.
1728+
_Start = _Re->_Next->_Next;
1729+
17241730
// sanitize multiline mode setting
17251731
#if _REGEX_LEGACY_MULTILINE_MODE
17261732
_Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars
@@ -1764,7 +1770,7 @@ public:
17641770

17651771
_Matched = false;
17661772

1767-
bool _Succeeded = _Match_pat(_Rep) || _Matched;
1773+
bool _Succeeded = _Match_pat(_Start) || _Matched;
17681774

17691775
if (!_Succeeded) {
17701776
return false;
@@ -1833,7 +1839,7 @@ private:
18331839

18341840
_It _Begin;
18351841
_It _End;
1836-
_Node_base* _Rep;
1842+
_Node_base* _Start;
18371843
regex_constants::syntax_option_type _Sflags;
18381844
regex_constants::match_flag_type _Mflags;
18391845
bool _Matched = false;
@@ -3998,14 +4004,13 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
39984004
{ // record current position
39994005
auto _Node = static_cast<_Node_capture*>(_Nx);
40004006
auto _Idx = _Node->_Idx;
4001-
if (_Idx != 0U) {
4002-
auto& _Group = _Tgt_state._Grps[_Idx];
4003-
auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Capture_restore_begin, _Node);
4004-
auto& _Frame = _Frames[_Frame_idx];
4005-
_Frame._Pos = _Group._Begin;
4006-
_Frame._Capture_idx = _Idx;
4007-
_Group._Begin = _Tgt_state._Cur;
4008-
}
4007+
_STL_INTERNAL_CHECK(_Idx != 0U);
4008+
auto& _Group = _Tgt_state._Grps[_Idx];
4009+
auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Capture_restore_begin, _Node);
4010+
auto& _Frame = _Frames[_Frame_idx];
4011+
_Frame._Pos = _Group._Begin;
4012+
_Frame._Capture_idx = _Idx;
4013+
_Group._Begin = _Tgt_state._Cur;
40094014
break;
40104015
}
40114016

@@ -4471,7 +4476,7 @@ _BidIt _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(
44714476
static constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr),
44724477
static_cast<wchar_t>(_Meta_nl), static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
44734478
constexpr unsigned int _Max_recursion_depth = 50U;
4474-
_Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;
4479+
_Node_base* _Nx = _Node_arg ? _Node_arg : _Start;
44754480

44764481
while (_First_arg != _Last && _Nx) { // check current node
44774482
switch (_Nx->_Kind) { // handle current node's type

0 commit comments

Comments
 (0)