diff --git a/regex-automata/src/meta/regex.rs b/regex-automata/src/meta/regex.rs
index 8cfdecbec..6bc4bdc71 100644
--- a/regex-automata/src/meta/regex.rs
+++ b/regex-automata/src/meta/regex.rs
@@ -611,7 +611,8 @@ impl Regex {
         &'r self,
         input: I,
     ) -> FindMatches<'r, 'h> {
-        let cache = self.pool.get();
+        let mut cache = self.pool.get();
+        cache.keep_lookaround_state(true);
         let it = iter::Searcher::new(input.into());
         FindMatches { re: self, cache, it }
     }
@@ -652,7 +653,8 @@ impl Regex {
         &'r self,
         input: I,
     ) -> CapturesMatches<'r, 'h> {
-        let cache = self.pool.get();
+        let mut cache = self.pool.get();
+        cache.keep_lookaround_state(true);
         let caps = self.create_captures();
         let it = iter::Searcher::new(input.into());
         CapturesMatches { re: self, cache, caps, it }
@@ -2076,7 +2078,11 @@ impl<'r, 'h> Iterator for FindMatches<'r, 'h> {
     #[inline]
     fn next(&mut self) -> Option<Match> {
         let FindMatches { re, ref mut cache, ref mut it } = *self;
-        it.advance(|input| Ok(re.search_with(cache, input)))
+        let result = it.advance(|input| Ok(re.search_with(cache, input)));
+        if result.is_none() {
+            cache.keep_lookaround_state(false);
+        }
+        result
     }
 
     #[inline]
@@ -2149,6 +2155,7 @@ impl<'r, 'h> Iterator for CapturesMatches<'r, 'h> {
         if caps.is_match() {
             Some(caps.clone())
         } else {
+            cache.keep_lookaround_state(false);
             None
         }
     }
@@ -2385,6 +2392,19 @@ impl Cache {
         re.imp.strat.reset_cache(self)
     }
 
+    /// Set this cache to keep the state of look-behind assertions upon a
+    /// match being found.
+    ///
+    /// This must only be called with a value of `true` when a new search is
+    /// started at the end of a previously found match, otherwise the result
+    /// of any search after this call will most likely be wrong.
+    ///
+    /// Calling this function with a value of `false` will clear any previously
+    /// stored look-behind state.
+    pub fn keep_lookaround_state(&mut self, keep: bool) {
+        self.pikevm.keep_lookaround_state(keep);
+    }
+
     /// Returns the heap memory usage, in bytes, of this cache.
     ///
     /// This does **not** include the stack size used up by this cache. To
diff --git a/regex-automata/src/meta/wrappers.rs b/regex-automata/src/meta/wrappers.rs
index f7c5c1096..83f5c12ab 100644
--- a/regex-automata/src/meta/wrappers.rs
+++ b/regex-automata/src/meta/wrappers.rs
@@ -133,6 +133,12 @@ impl PikeVMCache {
         PikeVMCache(Some(builder.get().0.create_cache()))
     }
 
+    pub(crate) fn keep_lookaround_state(&mut self, keep: bool) {
+        if let Some(cache) = self.0.as_mut() {
+            cache.keep_lookaround_state(keep);
+        }
+    }
+
     pub(crate) fn reset(&mut self, builder: &PikeVM) {
         self.0.as_mut().unwrap().reset(&builder.get().0);
     }
diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs
index 5a7bccd72..42dd32127 100644
--- a/regex-automata/src/nfa/thompson/compiler.rs
+++ b/regex-automata/src/nfa/thompson/compiler.rs
@@ -1038,12 +1038,12 @@ impl Compiler {
 
         let unanchored =
             self.c_at_least(&Hir::dot(hir::Dot::AnyByte), false, 0)?;
+        self.builder.borrow_mut().start_look_behind(unanchored.start);
 
         let sub = self.c(lookaround.sub())?;
         let write = self.add_write_lookaround(idx)?;
         self.patch(unanchored.end, sub.start)?;
         self.patch(sub.end, write)?;
-        self.builder.borrow_mut().start_look_behind(unanchored.start);
         Ok(ThompsonRef { start: check, end: check })
     }
 
diff --git a/regex-automata/src/nfa/thompson/pikevm.rs b/regex-automata/src/nfa/thompson/pikevm.rs
index b3e6e45c9..813804884 100644
--- a/regex-automata/src/nfa/thompson/pikevm.rs
+++ b/regex-automata/src/nfa/thompson/pikevm.rs
@@ -891,6 +891,7 @@ impl PikeVM {
         cache: &'c mut Cache,
         input: I,
     ) -> FindMatches<'r, 'c, 'h> {
+        cache.keep_lookaround_state(true);
         let caps = Captures::matches(self.get_nfa().group_info().clone());
         let it = iter::Searcher::new(input.into());
         FindMatches { re: self, cache, caps, it }
@@ -934,6 +935,7 @@ impl PikeVM {
         cache: &'c mut Cache,
         input: I,
     ) -> CapturesMatches<'r, 'c, 'h> {
+        cache.keep_lookaround_state(true);
         let caps = self.create_captures();
         let it = iter::Searcher::new(input.into());
         CapturesMatches { re: self, cache, caps, it }
@@ -1265,42 +1267,50 @@ impl PikeVM {
             ref mut lookaround,
             ref mut curr_lookaround,
             ref mut next_lookaround,
+            ref mut match_lookaround,
+            ref keep_lookaround_state,
         } = cache;
 
-        // This initializes the look-behind threads from the start of the input
-        // Note: since capture groups are not allowed inside look-behinds,
-        // there won't be any Capture epsilon transitions and hence it is ok to
-        // use &mut [] for the slots parameter. We need to add the start states
-        // in reverse because nested look-behinds have a higher index but must
-        // be executed first.
-        for look_behind_start in self.nfa.look_behind_starts() {
-            self.epsilon_closure(
-                stack,
-                &mut [],
-                curr_lookaround,
-                lookaround,
-                input,
-                0,
-                *look_behind_start,
-            );
-        }
+        if let Some(active) = match_lookaround {
+            *curr_lookaround = active.clone();
+        } else if self.lookaround_count() > 0 {
+            // This initializes the look-behind threads from the start of the input
+            // Note: since capture groups are not allowed inside look-behinds,
+            // there won't be any Capture epsilon transitions and hence it is ok to
+            // use &mut [] for the slots parameter. We need to add the start states
+            // in reverse because more deeply nested look-behinds have a higher index
+            // but must be executed first, so that the result is available for the
+            // outer expression.
+            for look_behind_start in self.nfa.look_behind_starts().iter().rev()
+            {
+                self.epsilon_closure(
+                    stack,
+                    &mut [],
+                    curr_lookaround,
+                    lookaround,
+                    input,
+                    0,
+                    *look_behind_start,
+                );
+            }
 
-        // This brings the look-behind threads into the state they must be for
-        // starting at input.start() instead of the beginning. This is
-        // necessary for look-behinds to be able to match outside of the input
-        // span.
-        for lb_at in 0..input.start() {
-            self.nexts(
-                stack,
-                curr_lookaround,
-                next_lookaround,
-                lookaround,
-                input,
-                lb_at,
-                &mut [],
-            );
-            core::mem::swap(curr_lookaround, next_lookaround);
-            next_lookaround.set.clear();
+            // This brings the look-behind threads into the state they must be for
+            // starting at input.start() instead of the beginning. This is
+            // necessary for lookbehinds to be able to match outside of the input
+            // span.
+            for lb_at in 0..input.start() {
+                self.nexts(
+                    stack,
+                    curr_lookaround,
+                    next_lookaround,
+                    lookaround,
+                    input,
+                    lb_at,
+                    &mut [],
+                );
+                core::mem::swap(curr_lookaround, next_lookaround);
+                next_lookaround.set.clear();
+            }
         }
 
         let mut hm = None;
@@ -1428,6 +1438,9 @@ impl PikeVM {
                 self.nexts(stack, curr, next, lookaround, input, at, slots)
             {
                 hm = Some(HalfMatch::new(pid, at));
+                if *keep_lookaround_state {
+                    *match_lookaround = Some(curr_lookaround.clone());
+                }
             }
             // Unless the caller asked us to return early, we need to mush on
             // to see if we can extend our match. (But note that 'nexts' will
@@ -1496,6 +1509,11 @@ impl PikeVM {
             ref mut lookaround,
             ref mut curr_lookaround,
             ref mut next_lookaround,
+            // It makes no sense to keep any look-behind state for this version of
+            // the search, since the caller receives no information about
+            // where the search ended.
+            keep_lookaround_state: _,
+            match_lookaround: _,
         } = cache;
 
         for look_behind_start in self.nfa.look_behind_starts() {
@@ -1989,10 +2007,14 @@ impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> {
             *self;
         // 'advance' converts errors into panics, which is OK here because
         // the PikeVM can never return an error.
-        it.advance(|input| {
+        let result = it.advance(|input| {
             re.search(cache, input, caps);
             Ok(caps.get_match())
-        })
+        });
+        if result.is_none() {
+            cache.keep_lookaround_state(false);
+        }
+        result
     }
 }
 
@@ -2034,6 +2056,7 @@ impl<'r, 'c, 'h> Iterator for CapturesMatches<'r, 'c, 'h> {
         if caps.is_match() {
             Some(caps.clone())
         } else {
+            cache.keep_lookaround_state(false);
             None
         }
     }
@@ -2070,6 +2093,14 @@ pub struct Cache {
     curr_lookaround: ActiveStates,
     /// The next set of states to be explored for look-behind subexpressions.
     next_lookaround: ActiveStates,
+    /// The set of active threads, belonging to look-behind expressions,
+    /// when a match was found. This is needed to resume a search after a match
+    /// was found (to look for further matches), without having to re-scan the
+    /// beginning of the haystack.
+    match_lookaround: Option<ActiveStates>,
+    /// When true, use the states of `match_lookaround` to initialize a search,
+    /// otherwise recompute from the beginning of the haystack.
+    keep_lookaround_state: bool,
 }
 
 impl Cache {
@@ -2089,6 +2120,8 @@ impl Cache {
             lookaround: vec![None; re.lookaround_count()],
             curr_lookaround: ActiveStates::new(re),
             next_lookaround: ActiveStates::new(re),
+            match_lookaround: None,
+            keep_lookaround_state: false,
         }
     }
 
@@ -2135,6 +2168,25 @@ impl Cache {
         self.curr_lookaround.reset(re);
         self.next_lookaround.reset(re);
         self.lookaround = vec![None; re.lookaround_count()];
+        self.match_lookaround = None;
+        self.keep_lookaround_state = false;
+    }
+
+    /// Set this cache to store a copy of the active threads belonging
+    /// to look-behind assertions upon a match being found.
+    ///
+    /// This is a performance optimization and must only be called with a
+    /// value of `true` when intending to start a new search at the end of
+    /// a previously found match. Otherwise, the result of look-behind
+    /// sub-expressions will be out of sync with the main regex.
+    ///
+    /// Calling this function with a value of `false` will clear any previously
+    /// stored look-behind state.
+    pub fn keep_lookaround_state(&mut self, keep: bool) {
+        self.keep_lookaround_state = keep;
+        if !keep {
+            self.match_lookaround = None;
+        }
     }
 
     /// Returns the heap memory usage, in bytes, of this cache.
@@ -2143,11 +2195,16 @@ impl Cache {
     /// compute that, use `std::mem::size_of::<Cache>()`.
     pub fn memory_usage(&self) -> usize {
         use core::mem::size_of;
+        let match_lookaround_memory = match &self.match_lookaround {
+            Some(ml) => ml.memory_usage(),
+            None => 0,
+        };
         (self.stack.len() * size_of::<FollowEpsilon>())
             + self.curr.memory_usage()
             + self.next.memory_usage()
             + self.curr_lookaround.memory_usage()
             + self.next_lookaround.memory_usage()
+            + match_lookaround_memory
     }
 
     /// Clears this cache. This should be called at the start of every search