diff --git a/regex-automata/src/nfa/thompson/builder.rs b/regex-automata/src/nfa/thompson/builder.rs
index c769fda23..4f2f9af79 100644
--- a/regex-automata/src/nfa/thompson/builder.rs
+++ b/regex-automata/src/nfa/thompson/builder.rs
@@ -340,6 +340,8 @@ pub struct Builder {
     /// contains a single regex, then `start_pattern[0]` and `start_anchored`
     /// are always equivalent.
     start_pattern: Vec<StateID>,
+    /// The starting states for each individual look-behind sub-expression.
+    start_look_behind: Vec<StateID>,
     /// A map from pattern ID to capture group index to name. (If no name
     /// exists, then a None entry is present. Thus, all capturing groups are
     /// present in this mapping.)
@@ -385,6 +387,7 @@ impl Builder {
         self.pattern_id = None;
         self.states.clear();
         self.start_pattern.clear();
+        self.start_look_behind.clear();
         self.captures.clear();
         self.memory_states = 0;
     }
@@ -449,6 +452,7 @@ impl Builder {
         remap.resize(self.states.len(), StateID::ZERO);
 
         nfa.set_starts(start_anchored, start_unanchored, &self.start_pattern);
+        nfa.set_look_behind_starts(self.start_look_behind.as_slice());
         nfa.set_captures(&self.captures).map_err(BuildError::captures)?;
         // The idea here is to convert our intermediate states to their final
         // form. The only real complexity here is the process of converting
@@ -706,6 +710,12 @@ impl Builder {
         self.start_pattern.len()
     }
 
+    /// Adds the `start_id` to the set of starting states that is used when
+    /// running look-behind expressions.
+    pub fn start_look_behind(&mut self, start_id: StateID) {
+        self.start_look_behind.push(start_id);
+    }
+
     /// Add an "empty" NFA state.
     ///
     /// An "empty" NFA state is a state with a single unconditional epsilon
diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs
index 7a9393d1e..5a7bccd72 100644
--- a/regex-automata/src/nfa/thompson/compiler.rs
+++ b/regex-automata/src/nfa/thompson/compiler.rs
@@ -711,11 +711,6 @@ pub struct Compiler {
     /// State used for caching common suffixes when compiling reverse UTF-8
     /// automata (for Unicode character classes).
     utf8_suffix: RefCell<Utf8SuffixMap>,
-    /// Top level alternation state which is used to run all look-around
-    /// assertion checks in lockstep with the main expression. Each look-around
-    /// expression is compiled to a set of states that is patched into this
-    /// state, and this state is updated on each new pattern being compiled.
-    lookaround_alt: RefCell<Option<StateID>>,
     /// The next index to use for a look-around expression.
     lookaround_index: RefCell<SmallIndex>,
 }
@@ -730,7 +725,6 @@ impl Compiler {
             utf8_state: RefCell::new(Utf8State::new()),
             trie_state: RefCell::new(RangeTrie::new()),
             utf8_suffix: RefCell::new(Utf8SuffixMap::new(1000)),
-            lookaround_alt: RefCell::new(None),
             lookaround_index: RefCell::new(SmallIndex::ZERO),
         }
     }
@@ -993,32 +987,11 @@ impl Compiler {
 
         let compiled = self.c_alt_iter(exprs.iter().map(|e| {
             let _ = self.start_pattern()?;
-            let has_lookarounds =
-                (e.borrow() as &Hir).properties().contains_lookaround_expr();
-            let mut top_level_alt = if has_lookarounds {
-                self.add_union()?
-            } else {
-                StateID::ZERO
-            };
-            if has_lookarounds {
-                let lookaround_prefix =
-                    self.c_at_least(&Hir::dot(hir::Dot::AnyByte), false, 0)?;
-                let lookaround_alt = self.add_union()?;
-                self.patch(lookaround_prefix.end, lookaround_alt)?;
-                self.patch(top_level_alt, lookaround_prefix.start)?;
-                self.lookaround_alt.borrow_mut().replace(lookaround_alt);
-            }
             let one = self.c_cap(0, None, e.borrow())?;
             let match_state_id = self.add_match()?;
             self.patch(one.end, match_state_id)?;
-            if has_lookarounds {
-                self.patch(top_level_alt, one.start)?;
-            } else {
-                top_level_alt = one.start;
-            }
-            let _ = self.finish_pattern(top_level_alt)?;
-            self.lookaround_alt.borrow_mut().take();
-            Ok(ThompsonRef { start: top_level_alt, end: match_state_id })
+            let _ = self.finish_pattern(one.start)?;
+            Ok(ThompsonRef { start: one.start, end: match_state_id })
         }))?;
         self.patch(unanchored_prefix.end, compiled.start)?;
         let nfa = self
@@ -1052,25 +1025,25 @@ impl Compiler {
         &self,
         lookaround: &LookAround,
     ) -> Result<ThompsonRef, BuildError> {
-        let sub = self.c(lookaround.sub())?;
-        let pos = match lookaround {
-            LookAround::NegativeLookBehind(_) => false,
-            LookAround::PositiveLookBehind(_) => true,
-        };
         let idx = *self.lookaround_index.borrow();
         *self.lookaround_index.borrow_mut() = SmallIndex::new(idx.one_more())
             .map_err(|e| {
                 BuildError::too_many_lookarounds(e.attempted() as usize)
             })?;
+        let pos = match lookaround {
+            LookAround::NegativeLookBehind(_) => false,
+            LookAround::PositiveLookBehind(_) => true,
+        };
         let check = self.add_check_lookaround(idx, pos)?;
+
+        let unanchored =
+            self.c_at_least(&Hir::dot(hir::Dot::AnyByte), false, 0)?;
+
+        let sub = self.c(lookaround.sub())?;
         let write = self.add_write_lookaround(idx)?;
+        self.patch(unanchored.end, sub.start)?;
         self.patch(sub.end, write)?;
-        self.patch(
-            self.lookaround_alt
-                .borrow()
-                .expect("Cannot compile look-around outside pattern"),
-            sub.start,
-        )?;
+        self.builder.borrow_mut().start_look_behind(unanchored.start);
         Ok(ThompsonRef { start: check, end: check })
     }
 
@@ -2169,13 +2142,12 @@ mod tests {
             &[
                 s_bin_union(2, 1),
                 s_range(0, 255, 0),
-                s_bin_union(3, 6),
+                s_check_lookaround(0, true, 7),
                 s_bin_union(5, 4),
                 s_range(0, 255, 3),
-                s_look(Look::Start, 7),
-                s_check_lookaround(0, true, 8),
+                s_look(Look::Start, 6),
                 s_write_lookaround(0),
-                s_byte(b'a', 9),
+                s_byte(b'a', 8),
                 s_match(0)
             ]
         );
@@ -2310,11 +2282,10 @@ mod tests {
         assert_eq!(
             build(r"(?<=a)").states(),
             &[
-                s_bin_union(1, 4),
+                s_check_lookaround(0, true, 5),
                 s_bin_union(3, 2),
                 s_range(b'\x00', b'\xFF', 1),
-                s_byte(b'a', 5),
-                s_check_lookaround(0, true, 6),
+                s_byte(b'a', 4),
                 s_write_lookaround(0),
                 s_match(0)
             ]
@@ -2322,16 +2293,16 @@ mod tests {
         assert_eq!(
             build(r"(?<=a(?<!b))").states(),
             &[
-                s_bin_union(1, 8),
+                s_check_lookaround(0, true, 10),
                 s_bin_union(3, 2),
                 s_range(b'\x00', b'\xFF', 1),
-                s_bin_union(5, 4),
-                s_byte(b'a', 6),
-                s_byte(b'b', 7),
-                s_check_lookaround(0, false, 9),
-                s_write_lookaround(0),
-                s_check_lookaround(1, true, 10),
+                s_byte(b'a', 4),
+                s_check_lookaround(1, false, 9),
+                s_bin_union(7, 6),
+                s_range(b'\x00', b'\xFF', 5),
+                s_byte(b'b', 8),
                 s_write_lookaround(1),
+                s_write_lookaround(0),
                 s_match(0)
             ]
         );
diff --git a/regex-automata/src/nfa/thompson/nfa.rs b/regex-automata/src/nfa/thompson/nfa.rs
index 2657540cb..1d63bd64a 100644
--- a/regex-automata/src/nfa/thompson/nfa.rs
+++ b/regex-automata/src/nfa/thompson/nfa.rs
@@ -1106,6 +1106,12 @@ impl NFA {
         self.0.lookaround_count
     }
 
+    /// Returns the starting states for initializing look-behind evaluation.
+    #[inline]
+    pub fn look_behind_starts(&self) -> &Vec<StateID> {
+        &self.0.start_look_behind
+    }
+
     // FIXME: The `look_set_prefix_all` computation was not correct, and it
     // seemed a little tricky to fix it. Since I wasn't actually using it for
     // anything, I just decided to remove it in the run up to the regex 1.9
@@ -1270,6 +1276,8 @@ pub(super) struct Inner {
     /// This is needed to initialize the table for storing the result of
     /// look-around evaluation.
     lookaround_count: usize,
+    /// Contains the start states for each of the look-behind subexpressions.
+    start_look_behind: Vec<StateID>,
     /// Heap memory used indirectly by NFA states and other things (like the
     /// various capturing group representations above). Since each state
     /// might use a different amount of heap, we need to keep track of this
@@ -1419,6 +1427,13 @@ impl Inner {
         self.start_pattern = start_pattern.to_vec();
     }
 
+    pub(super) fn set_look_behind_starts(
+        &mut self,
+        look_behind_starts: &[StateID],
+    ) {
+        self.start_look_behind = look_behind_starts.to_vec();
+    }
+
     /// Sets the UTF-8 mode of this NFA.
     pub(super) fn set_utf8(&mut self, yes: bool) {
         self.utf8 = yes;
@@ -1472,6 +1487,9 @@ impl Inner {
         for id in self.start_pattern.iter_mut() {
             *id = old_to_new[*id];
         }
+        for id in self.start_look_behind.iter_mut() {
+            *id = old_to_new[*id];
+        }
     }
 }
 
@@ -1483,6 +1501,8 @@ impl fmt::Debug for Inner {
                 '^'
             } else if sid == self.start_unanchored {
                 '>'
+            } else if self.start_look_behind.contains(&sid) {
+                '<'
             } else {
                 ' '
             };
diff --git a/regex-automata/src/nfa/thompson/pikevm.rs b/regex-automata/src/nfa/thompson/pikevm.rs
index eb40bf1a9..b3e6e45c9 100644
--- a/regex-automata/src/nfa/thompson/pikevm.rs
+++ b/regex-automata/src/nfa/thompson/pikevm.rs
@@ -1263,7 +1263,46 @@ impl PikeVM {
             ref mut curr,
             ref mut next,
             ref mut lookaround,
+            ref mut curr_lookaround,
+            ref mut next_lookaround,
         } = cache;
+
+        // This initializes the look-behind threads from the start of the input
+        // Note: since capture groups are not allowed inside look-behinds,
+        // there won't be any Capture epsilon transitions and hence it is ok to
+        // use &mut [] for the slots parameter. We need to add the start states
+        // in reverse because nested look-behinds have a higher index but must
+        // be executed first.
+        for look_behind_start in self.nfa.look_behind_starts() {
+            self.epsilon_closure(
+                stack,
+                &mut [],
+                curr_lookaround,
+                lookaround,
+                input,
+                0,
+                *look_behind_start,
+            );
+        }
+
+        // This brings the look-behind threads into the state they must be for
+        // starting at input.start() instead of the beginning. This is
+        // necessary for look-behinds to be able to match outside of the input
+        // span.
+        for lb_at in 0..input.start() {
+            self.nexts(
+                stack,
+                curr_lookaround,
+                next_lookaround,
+                lookaround,
+                input,
+                lb_at,
+                &mut [],
+            );
+            core::mem::swap(curr_lookaround, next_lookaround);
+            next_lookaround.set.clear();
+        }
+
         let mut hm = None;
         // Yes, our search doesn't end at input.end(), but includes it. This
         // is necessary because matches are delayed by one byte, just like
@@ -1374,6 +1413,17 @@ impl PikeVM {
                     stack, slots, curr, lookaround, input, at, start_id,
                 );
             }
+            // The look-behind states must be processed first, since their
+            // result must be available for the processing of the main states.
+            self.nexts(
+                stack,
+                curr_lookaround,
+                next_lookaround,
+                lookaround,
+                input,
+                at,
+                &mut [],
+            );
             if let Some(pid) =
                 self.nexts(stack, curr, next, lookaround, input, at, slots)
             {
@@ -1387,7 +1437,9 @@ impl PikeVM {
                 break;
             }
             core::mem::swap(curr, next);
+            core::mem::swap(curr_lookaround, next_lookaround);
             next.set.clear();
+            next_lookaround.set.clear();
             at += 1;
         }
         instrument!(|c| c.eprint(&self.nfa));
@@ -1442,7 +1494,34 @@ impl PikeVM {
             ref mut curr,
             ref mut next,
             ref mut lookaround,
+            ref mut curr_lookaround,
+            ref mut next_lookaround,
         } = cache;
+
+        for look_behind_start in self.nfa.look_behind_starts() {
+            self.epsilon_closure(
+                stack,
+                &mut [],
+                curr_lookaround,
+                lookaround,
+                input,
+                0,
+                *look_behind_start,
+            );
+        }
+        for lb_at in 0..input.start() {
+            self.nexts(
+                stack,
+                curr_lookaround,
+                next_lookaround,
+                lookaround,
+                input,
+                lb_at,
+                &mut [],
+            );
+            core::mem::swap(curr_lookaround, next_lookaround);
+            next_lookaround.set.clear();
+        }
         for at in input.start()..=input.end() {
             let any_matches = !patset.is_empty();
             if curr.set.is_empty() {
@@ -1459,6 +1538,15 @@ impl PikeVM {
                     stack, slots, curr, lookaround, input, at, start_id,
                 );
             }
+            self.nexts(
+                stack,
+                curr_lookaround,
+                next_lookaround,
+                lookaround,
+                input,
+                at,
+                &mut [],
+            );
             self.nexts_overlapping(
                 stack, curr, next, lookaround, input, at, patset,
             );
@@ -1470,7 +1558,9 @@ impl PikeVM {
                 break;
             }
             core::mem::swap(curr, next);
+            core::mem::swap(curr_lookaround, next_lookaround);
             next.set.clear();
+            next_lookaround.set.clear();
         }
         instrument!(|c| c.eprint(&self.nfa));
     }
@@ -1976,6 +2066,10 @@ pub struct Cache {
     /// haystack at which look-around indexed x holds and which is <= to the
     /// current position".
     lookaround: Vec<Option<NonMaxUsize>>,
+    /// The current active states for look-behind subexpressions.
+    curr_lookaround: ActiveStates,
+    /// The next set of states to be explored for look-behind subexpressions.
+    next_lookaround: ActiveStates,
 }
 
 impl Cache {
@@ -1993,6 +2087,8 @@ impl Cache {
             curr: ActiveStates::new(re),
             next: ActiveStates::new(re),
             lookaround: vec![None; re.lookaround_count()],
+            curr_lookaround: ActiveStates::new(re),
+            next_lookaround: ActiveStates::new(re),
         }
     }
 
@@ -2036,6 +2132,9 @@ impl Cache {
     pub fn reset(&mut self, re: &PikeVM) {
         self.curr.reset(re);
         self.next.reset(re);
+        self.curr_lookaround.reset(re);
+        self.next_lookaround.reset(re);
+        self.lookaround = vec![None; re.lookaround_count()];
     }
 
     /// Returns the heap memory usage, in bytes, of this cache.
@@ -2047,6 +2146,8 @@ impl Cache {
         (self.stack.len() * size_of::<FollowEpsilon>())
             + self.curr.memory_usage()
             + self.next.memory_usage()
+            + self.curr_lookaround.memory_usage()
+            + self.next_lookaround.memory_usage()
     }
 
     /// Clears this cache. This should be called at the start of every search
@@ -2063,6 +2164,10 @@ impl Cache {
         self.stack.clear();
         self.curr.setup_search(captures_slot_len);
         self.next.setup_search(captures_slot_len);
+        // capture groups are not allowed inside look-arounds, so we
+        // set the slot-length to zero.
+        self.curr_lookaround.setup_search(0);
+        self.next_lookaround.setup_search(0);
     }
 }
 
diff --git a/testdata/lookaround.toml b/testdata/lookaround.toml
index 8818a8f1a..14a303d7c 100644
--- a/testdata/lookaround.toml
+++ b/testdata/lookaround.toml
@@ -46,6 +46,18 @@ regex = "(?<=c[def]+(?<!fed))a"
 haystack = "cdaceacfeeacfedeacfeda"
 matches = [[2, 3], [5, 6], [10, 11], [16, 17]]
 
+[[test]]
+name = "nested positive lookbehind"
+regex = "(?<=\\w{2}(?<=\\d))d"
+haystack = "ad 1d1ada1d1 d a1d"
+matches = [[10, 11], [17, 18]]
+
+[[test]]
+name = "overlapping lookbehind"
+regex = "(?<=abcab)c"
+haystack = "abcabcabcabc"
+matches = [[5,6], [8,9], [11,12]]
+
 [[test]]
 name = "lookbehind with alternation"
 regex = "(?<=def|abc)a"