@@ -891,6 +891,7 @@ impl PikeVM {
891
891
cache : & ' c mut Cache ,
892
892
input : I ,
893
893
) -> FindMatches < ' r , ' c , ' h > {
894
+ cache. keep_lookaround_state ( true ) ;
894
895
let caps = Captures :: matches ( self . get_nfa ( ) . group_info ( ) . clone ( ) ) ;
895
896
let it = iter:: Searcher :: new ( input. into ( ) ) ;
896
897
FindMatches { re : self , cache, caps, it }
@@ -934,6 +935,7 @@ impl PikeVM {
934
935
cache : & ' c mut Cache ,
935
936
input : I ,
936
937
) -> CapturesMatches < ' r , ' c , ' h > {
938
+ cache. keep_lookaround_state ( true ) ;
937
939
let caps = self . create_captures ( ) ;
938
940
let it = iter:: Searcher :: new ( input. into ( ) ) ;
939
941
CapturesMatches { re : self , cache, caps, it }
@@ -1265,42 +1267,48 @@ impl PikeVM {
1265
1267
ref mut lookaround,
1266
1268
ref mut curr_lookaround,
1267
1269
ref mut next_lookaround,
1270
+ ref mut match_lookaround,
1271
+ ref keep_lookaround_state,
1268
1272
} = cache;
1269
1273
1270
- // This initializes the look-behind threads from the start of the input
1271
- // Note: since capture groups are not allowed inside look-behinds,
1272
- // there won't be any Capture epsilon transitions and hence it is ok to
1273
- // use &mut [] for the slots parameter. We need to add the start states
1274
- // in reverse because nested look-behinds have a higher index but must
1275
- // be executed first.
1276
- for look_behind_start in self . nfa . look_behind_starts ( ) {
1277
- self . epsilon_closure (
1278
- stack,
1279
- & mut [ ] ,
1280
- curr_lookaround,
1281
- lookaround,
1282
- input,
1283
- 0 ,
1284
- * look_behind_start,
1285
- ) ;
1286
- }
1274
+ if let Some ( active) = match_lookaround {
1275
+ * curr_lookaround = active. clone ( ) ;
1276
+ } else {
1277
+ // This initializes the look-behind threads from the start of the input
1278
+ // Note: since capture groups are not allowed inside look-behinds,
1279
+ // there won't be any Capture epsilon transitions and hence it is ok to
1280
+ // use &mut [] for the slots parameter. We need to add the start states
1281
+ // in reverse because nested look-behinds have a higher index but must
1282
+ // be executed first.
1283
+ for look_behind_start in self . nfa . look_behind_starts ( ) {
1284
+ self . epsilon_closure (
1285
+ stack,
1286
+ & mut [ ] ,
1287
+ curr_lookaround,
1288
+ lookaround,
1289
+ input,
1290
+ 0 ,
1291
+ * look_behind_start,
1292
+ ) ;
1293
+ }
1287
1294
1288
- // This brings the look-behind threads into the state they must be for
1289
- // starting at input.start() instead of the beginning. This is
1290
- // necessary for look-behinds to be able to match outside of the input
1291
- // span.
1292
- for lb_at in 0 ..input. start ( ) {
1293
- self . nexts (
1294
- stack,
1295
- curr_lookaround,
1296
- next_lookaround,
1297
- lookaround,
1298
- input,
1299
- lb_at,
1300
- & mut [ ] ,
1301
- ) ;
1302
- core:: mem:: swap ( curr_lookaround, next_lookaround) ;
1303
- next_lookaround. set . clear ( ) ;
1295
+ // This brings the look-behind threads into the state they must be for
1296
+ // starting at input.start() instead of the beginning. This is
1297
+ // necessary for lookbehinds to be able to match outside of the input
1298
+ // span.
1299
+ for lb_at in 0 ..input. start ( ) {
1300
+ self . nexts (
1301
+ stack,
1302
+ curr_lookaround,
1303
+ next_lookaround,
1304
+ lookaround,
1305
+ input,
1306
+ lb_at,
1307
+ & mut [ ] ,
1308
+ ) ;
1309
+ core:: mem:: swap ( curr_lookaround, next_lookaround) ;
1310
+ next_lookaround. set . clear ( ) ;
1311
+ }
1304
1312
}
1305
1313
1306
1314
let mut hm = None ;
@@ -1428,6 +1436,9 @@ impl PikeVM {
1428
1436
self . nexts ( stack, curr, next, lookaround, input, at, slots)
1429
1437
{
1430
1438
hm = Some ( HalfMatch :: new ( pid, at) ) ;
1439
+ if * keep_lookaround_state {
1440
+ * match_lookaround = Some ( curr_lookaround. clone ( ) ) ;
1441
+ }
1431
1442
}
1432
1443
// Unless the caller asked us to return early, we need to mush on
1433
1444
// to see if we can extend our match. (But note that 'nexts' will
@@ -1496,6 +1507,10 @@ impl PikeVM {
1496
1507
ref mut lookaround,
1497
1508
ref mut curr_lookaround,
1498
1509
ref mut next_lookaround,
1510
+ // It makes no sense to keep any look-behind state for this version of
1511
+ // the search, since the caller receives no information about
1512
+ // where the search ended.
1513
+ ..
1499
1514
} = cache;
1500
1515
1501
1516
for look_behind_start in self . nfa . look_behind_starts ( ) {
@@ -1989,10 +2004,14 @@ impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> {
1989
2004
* self ;
1990
2005
// 'advance' converts errors into panics, which is OK here because
1991
2006
// the PikeVM can never return an error.
1992
- it. advance ( |input| {
2007
+ let result = it. advance ( |input| {
1993
2008
re. search ( cache, input, caps) ;
1994
2009
Ok ( caps. get_match ( ) )
1995
- } )
2010
+ } ) ;
2011
+ if result. is_none ( ) {
2012
+ cache. keep_lookaround_state ( false ) ;
2013
+ }
2014
+ result
1996
2015
}
1997
2016
}
1998
2017
@@ -2034,6 +2053,7 @@ impl<'r, 'c, 'h> Iterator for CapturesMatches<'r, 'c, 'h> {
2034
2053
if caps. is_match ( ) {
2035
2054
Some ( caps. clone ( ) )
2036
2055
} else {
2056
+ cache. keep_lookaround_state ( false ) ;
2037
2057
None
2038
2058
}
2039
2059
}
@@ -2070,6 +2090,12 @@ pub struct Cache {
2070
2090
curr_lookaround : ActiveStates ,
2071
2091
/// The next set of states to be explored for look-behind subexpressions.
2072
2092
next_lookaround : ActiveStates ,
2093
+ /// The active set of states when a match was found. This is needed
2094
+ /// to resume a search without recomputing look-behind subexpressions.
2095
+ match_lookaround : Option < ActiveStates > ,
2096
+ /// When true, use the states of `match_lookaround` to initialize a search,
2097
+ /// otherwise recompute from the beginning of the haystack.
2098
+ keep_lookaround_state : bool ,
2073
2099
}
2074
2100
2075
2101
impl Cache {
@@ -2089,6 +2115,8 @@ impl Cache {
2089
2115
lookaround : vec ! [ None ; re. lookaround_count( ) ] ,
2090
2116
curr_lookaround : ActiveStates :: new ( re) ,
2091
2117
next_lookaround : ActiveStates :: new ( re) ,
2118
+ match_lookaround : None ,
2119
+ keep_lookaround_state : false ,
2092
2120
}
2093
2121
}
2094
2122
@@ -2135,6 +2163,24 @@ impl Cache {
2135
2163
self . curr_lookaround . reset ( re) ;
2136
2164
self . next_lookaround . reset ( re) ;
2137
2165
self . lookaround = vec ! [ None ; re. lookaround_count( ) ] ;
2166
+ self . match_lookaround = None ;
2167
+ self . keep_lookaround_state = false ;
2168
+ }
2169
+
2170
+ /// Set this cache to keep the state of look-behind assertions upon a
2171
+ /// match being found.
2172
+ ///
2173
+ /// This must only be called with a value of `true` when a new search is
2174
+ /// started at the end of a previously found match, otherwise the result
2175
+ /// of any search after this call will most likely be wrong.
2176
+ ///
2177
+ /// Calling this function with a value of `false` will clear any previously
2178
+ /// stored look-behind state.
2179
+ pub fn keep_lookaround_state ( & mut self , keep : bool ) {
2180
+ self . keep_lookaround_state = keep;
2181
+ if !keep {
2182
+ self . match_lookaround = None ;
2183
+ }
2138
2184
}
2139
2185
2140
2186
/// Returns the heap memory usage, in bytes, of this cache.
@@ -2143,11 +2189,16 @@ impl Cache {
2143
2189
/// compute that, use `std::mem::size_of::<Cache>()`.
2144
2190
pub fn memory_usage ( & self ) -> usize {
2145
2191
use core:: mem:: size_of;
2192
+ let match_lookaround_memory = match & self . match_lookaround {
2193
+ Some ( ml) => ml. memory_usage ( ) ,
2194
+ None => 0 ,
2195
+ } ;
2146
2196
( self . stack . len ( ) * size_of :: < FollowEpsilon > ( ) )
2147
2197
+ self . curr . memory_usage ( )
2148
2198
+ self . next . memory_usage ( )
2149
2199
+ self . curr_lookaround . memory_usage ( )
2150
2200
+ self . next_lookaround . memory_usage ( )
2201
+ + match_lookaround_memory
2151
2202
}
2152
2203
2153
2204
/// Clears this cache. This should be called at the start of every search
0 commit comments