From c2f5c7e31cb61a9b9bedc90c0568c1cad03ea2ca Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Wed, 22 Oct 2025 13:16:52 +0300
Subject: [PATCH 01/11] poc

---
 tests/unit/compiler/venom/test_stack_spill.py | 75 +++++++++++++++++
 vyper/utils.py                                |  1 +
 vyper/venom/venom_to_assembly.py              | 82 +++++++++++++++++--
 3 files changed, 153 insertions(+), 5 deletions(-)
 create mode 100644 tests/unit/compiler/venom/test_stack_spill.py

diff --git a/tests/unit/compiler/venom/test_stack_spill.py b/tests/unit/compiler/venom/test_stack_spill.py
new file mode 100644
index 0000000000..a258c1ed00
--- /dev/null
+++ b/tests/unit/compiler/venom/test_stack_spill.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import pytest
+
+from vyper.venom.basicblock import IRLiteral
+from vyper.venom.context import IRContext
+from vyper.venom.stack_model import StackModel
+from vyper.venom.venom_to_assembly import VenomCompiler
+
+
+@pytest.fixture
+def compiler() -> VenomCompiler:
+    ctx = IRContext()
+    ctx.add_constant("mem_deploy_end", 0)
+    return VenomCompiler(ctx)
+
+
+def _build_stack(count: int) -> tuple[StackModel, list[IRLiteral]]:
+    stack = StackModel()
+    ops = [IRLiteral(i) for i in range(count)]
+    for op in ops:
+        stack.push(op)
+    return stack, ops
+
+
+def _ops_only_strings(assembly) -> list[str]:
+    return [op for op in assembly if isinstance(op, str)]
+
+
+def test_swap_spills_deep_stack(compiler: VenomCompiler) -> None:
+    stack, ops = _build_stack(40)
+    assembly: list = []
+
+    target = ops[-18]
+    before = stack._stack.copy()
+
+    depth = stack.get_depth(target)
+    assert isinstance(depth, int) and depth < -16
+    swap_idx = -depth
+
+    compiler.swap(assembly, stack, depth)
+
+    expected = before.copy()
+    top_index = len(expected) - 1
+    target_index = expected.index(target)
+    expected[top_index], expected[target_index] = expected[target_index], expected[top_index]
+    assert stack._stack == expected
+
+    ops_str = _ops_only_strings(assembly)
+    assert ops_str.count("MSTORE") == swap_idx + 1
+    assert ops_str.count("MLOAD") == swap_idx + 1
+    assert all(int(op[4:]) <= 16 for op in ops_str if op.startswith("SWAP"))
+
+
+def test_dup_spills_deep_stack(compiler: VenomCompiler) -> None:
+    stack, ops = _build_stack(40)
+    assembly: list = []
+
+    target = ops[-18]
+    before = stack._stack.copy()
+
+    depth = stack.get_depth(target)
+    assert isinstance(depth, int) and depth < -16
+    dup_idx = 1 - depth
+
+    compiler.dup(assembly, stack, depth)
+
+    expected = before.copy()
+    expected.append(target)
+    assert stack._stack == expected
+
+    ops_str = _ops_only_strings(assembly)
+    assert ops_str.count("MSTORE") == dup_idx
+    assert ops_str.count("MLOAD") == dup_idx + 1
+    assert all(int(op[3:]) <= 16 for op in ops_str if op.startswith("DUP"))
diff --git a/vyper/utils.py b/vyper/utils.py
index 9b1084ab8f..579a095288 100644
--- a/vyper/utils.py
+++ b/vyper/utils.py
@@ -428,6 +428,7 @@ class MemoryPositions:
     FREE_VAR_SPACE = 0
     FREE_VAR_SPACE2 = 32
     RESERVED_MEMORY = 64
+    STACK_SPILL_BASE = 0x200  # scratch space used for spilling deep stacks
 
 
 # Sizes of different data types. Used to clamp types.
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index a4a2de0666..dfe5e89dd5 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -628,18 +628,90 @@ def pop(self, assembly, stack, num=1):
         stack.pop(num)
         assembly.extend(["POP"] * num)
 
+    def _spill_stack_segment(
+        self, assembly, stack, count: int, base_offset: int
+    ) -> tuple[list[IROperand], list[int], int]:
+        spill_ops: list[IROperand] = []
+        offsets: list[int] = []
+        cost = 0
+
+        for i in range(count):
+            op = stack.peek(0)
+            spill_ops.append(op)
+
+            offset = base_offset + 32 * i
+            offsets.append(offset)
+
+            assembly.extend(PUSH(offset))
+            assembly.append("MSTORE")
+            stack.pop()
+            cost += 2
+
+        return spill_ops, offsets, cost
+
+    def _restore_spilled_segment(
+        self,
+        assembly,
+        stack,
+        spill_ops: list[IROperand],
+        offsets: list[int],
+        desired_indices: list[int],
+    ) -> int:
+        cost = 0
+
+        for idx in reversed(desired_indices):
+            assembly.extend(PUSH(offsets[idx]))
+            assembly.append("MLOAD")
+            stack.push(spill_ops[idx])
+            cost += 2
+
+        return cost
+
     def swap(self, assembly, stack, depth) -> int:
         # Swaps of the top is no op
         if depth == 0:
             return 0
 
-        stack.swap(depth)
-        assembly.append(_evm_swap_for(depth))
-        return 1
+        swap_idx = -depth
+        if swap_idx < 1:
+            raise StackTooDeep(f"Unsupported swap depth {swap_idx}")
+        if swap_idx <= 16:
+            stack.swap(depth)
+            assembly.append(_evm_swap_for(depth))
+            return 1
+
+        chunk_size = swap_idx + 1
+        spill_ops, offsets, cost = self._spill_stack_segment(
+            assembly, stack, chunk_size, MemoryPositions.STACK_SPILL_BASE
+        )
+
+        indices = list(range(chunk_size))
+        if chunk_size == 1:
+            desired_indices = indices
+        else:
+            desired_indices = [indices[-1]] + indices[1:-1] + [indices[0]]
+
+        cost += self._restore_spilled_segment(assembly, stack, spill_ops, offsets, desired_indices)
+        return cost
 
     def dup(self, assembly, stack, depth):
-        stack.dup(depth)
-        assembly.append(_evm_dup_for(depth))
+        dup_idx = 1 - depth
+        if dup_idx < 1:
+            raise StackTooDeep(f"Unsupported dup depth {dup_idx}")
+        if dup_idx <= 16:
+            stack.dup(depth)
+            assembly.append(_evm_dup_for(depth))
+            return
+
+        chunk_size = dup_idx
+        spill_ops, offsets, _ = self._spill_stack_segment(
+            assembly, stack, chunk_size, MemoryPositions.STACK_SPILL_BASE
+        )
+
+        indices = list(range(chunk_size))
+        desired_indices = [indices[-1]] + indices
+
+        self._restore_spilled_segment(assembly, stack, spill_ops, offsets, desired_indices)
 
     def swap_op(self, assembly, stack, op):
         depth = stack.get_depth(op)

From 30f188f83c75fa81fd526722a39dc5294a2988e8 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Wed, 22 Oct 2025 14:08:08 +0300
Subject: [PATCH 02/11] wip

---
 tests/unit/compiler/venom/test_stack_spill.py |  44 ++++-
 vyper/venom/venom_to_assembly.py              | 170 ++++++++++++++++--
 2 files changed, 200 insertions(+), 14 deletions(-)

diff --git a/tests/unit/compiler/venom/test_stack_spill.py b/tests/unit/compiler/venom/test_stack_spill.py
index a258c1ed00..aa274ce054 100644
--- a/tests/unit/compiler/venom/test_stack_spill.py
+++ b/tests/unit/compiler/venom/test_stack_spill.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from vyper.venom.basicblock import IRLiteral
+from vyper.venom.basicblock import IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.stack_model import StackModel
 from vyper.venom.venom_to_assembly import VenomCompiler
@@ -27,6 +27,14 @@ def _ops_only_strings(assembly) -> list[str]:
     return [op for op in assembly if isinstance(op, str)]
 
 
+def _dummy_dfg():
+    class _DummyDFG:
+        def are_equivalent(self, a, b):
+            return False
+
+    return _DummyDFG()
+
+
 def test_swap_spills_deep_stack(compiler: VenomCompiler) -> None:
     stack, ops = _build_stack(40)
     assembly: list = []
@@ -73,3 +81,37 @@ def test_dup_spills_deep_stack(compiler: VenomCompiler) -> None:
     assert ops_str.count("MSTORE") == dup_idx
     assert ops_str.count("MLOAD") == dup_idx + 1
     assert all(int(op[3:]) <= 16 for op in ops_str if op.startswith("DUP"))
+
+
+def test_stack_reorder_spills_before_swap(compiler: VenomCompiler) -> None:
+    compiler.dfg = _dummy_dfg()
+    compiler._spill_next_slot = 0
+    compiler._spill_free_slots = []
+
+    stack = StackModel()
+    vars_on_stack = [IRVariable(f"%v{i}") for i in range(40)]
+    for var in vars_on_stack:
+        stack.push(var)
+
+    spilled: dict = {}
+    assembly: list = []
+
+    target = vars_on_stack[21]  # depth 18 from top for 40 items
+
+    compiler._stack_reorder(assembly, stack, [target], spilled, dry_run=False)
+
+    assert stack.get_depth(target) == 0
+    assert len(spilled) == 2  # spilled top two values to reduce depth to <= 16
+
+    ops_str = _ops_only_strings(assembly)
+    assert ops_str.count("MSTORE") == 2
+    assert all(int(op[4:]) <= 16 for op in ops_str if op.startswith("SWAP"))
+
+    # restoring a spilled variable should reload it via MLOAD
+    restore_assembly: list = []
+    spilled_var = next(iter(spilled))
+    compiler._restore_spilled_operand(restore_assembly, stack, spilled, spilled_var)
+    restore_ops = _ops_only_strings(restore_assembly)
+    assert restore_ops.count("MLOAD") == 1
+    assert spilled_var not in spilled
+    assert stack.get_depth(spilled_var) == 0
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index dfe5e89dd5..356cc292b6 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -153,6 +153,8 @@ def __init__(self, ctx: IRContext):
         self.ctx = ctx
         self.label_counter = 0
         self.visited_basicblocks = OrderedSet()
+        self._spill_next_slot = 0
+        self._spill_free_slots: list[int] = []
 
     def mklabel(self, name: str) -> Label:
         self.label_counter += 1
@@ -173,7 +175,10 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr
 
             assert self.cfg.is_normalized(), "Non-normalized CFG!"
 
-            self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel())
+            self._spill_next_slot = 0
+            self._spill_free_slots: list[int] = []
+
+            self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel(), {})
 
         asm.extend(_REVERT_POSTAMBLE)
         # Append data segment
@@ -197,11 +202,22 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr
         return asm
 
     def _stack_reorder(
-        self, assembly: list, stack: StackModel, stack_ops: list[IROperand], dry_run: bool = False
+        self,
+        assembly: list,
+        stack: StackModel,
+        stack_ops: list[IROperand],
+        spilled: dict[IROperand, int],
+        dry_run: bool = False,
     ) -> int:
         if dry_run:
             assert len(assembly) == 0, "Dry run should not work on assembly"
             stack = stack.copy()
+            spilled = spilled.copy()
+            spill_free_snapshot = self._spill_free_slots.copy()
+            spill_next_snapshot = self._spill_next_slot
+        else:
+            spill_free_snapshot = []
+            spill_next_snapshot = 0
 
         if len(stack_ops) == 0:
             return 0
@@ -213,10 +229,26 @@ def _stack_reorder(
         cost = 0
         for i, op in enumerate(stack_ops):
             final_stack_depth = -(len(stack_ops) - i - 1)
+
+            if isinstance(op, IRVariable) and op in spilled:
+                self._restore_spilled_operand(assembly, stack, spilled, op, dry_run=dry_run)
+
             depth = stack.get_depth(op)
 
             if depth == StackModel.NOT_IN_STACK:
-                raise CompilerPanic(f"Variable {op} not in stack")
+                if isinstance(op, IRVariable) and op in spilled:
+                    self._restore_spilled_operand(assembly, stack, spilled, op, dry_run=dry_run)
+                    depth = stack.get_depth(op)
+                else:
+                    raise CompilerPanic(f"Variable {op} not in stack")
+
+            if depth < -16:
+                if not self._reduce_depth_via_spill(
+                    assembly, stack, spilled, stack_ops, op, depth, dry_run
+                ):
+                    depth = stack.get_depth(op)
+                else:
+                    depth = stack.get_depth(op)
 
             if depth == final_stack_depth:
                 continue
@@ -233,8 +265,110 @@ def _stack_reorder(
 
         assert stack._stack[-len(stack_ops) :] == stack_ops, (stack, stack_ops)
 
+        if dry_run:
+            self._spill_free_slots = spill_free_snapshot
+            self._spill_next_slot = spill_next_snapshot
+
         return cost
 
+    def _reduce_depth_via_spill(
+        self,
+        assembly: list,
+        stack: StackModel,
+        spilled: dict[IROperand, int],
+        stack_ops: list[IROperand],
+        target_op: IROperand,
+        depth: int,
+        dry_run: bool,
+    ) -> bool:
+        while depth < -16:
+            candidate_depth = self._select_spill_candidate(stack, stack_ops, depth)
+            if candidate_depth is None:
+                return False
+            self._spill_operand(assembly, stack, spilled, candidate_depth, dry_run)
+            depth = stack.get_depth(target_op)
+            if depth == StackModel.NOT_IN_STACK:
+                if isinstance(target_op, IRVariable) and target_op in spilled:
+                    self._restore_spilled_operand(assembly, stack, spilled, target_op, dry_run)
+                    depth = stack.get_depth(target_op)
+                else:
+                    return False
+        return True
+
+    def _select_spill_candidate(
+        self, stack: StackModel, stack_ops: list[IROperand], target_depth: int
+    ) -> int | None:
+        forbidden = set(stack_ops)
+        max_offset = min(16, -target_depth - 1, stack.height - 1)
+        if max_offset < 0:
+            return None
+        for offset in range(0, max_offset + 1):
+            depth = -offset
+            candidate = stack.peek(depth)
+            if candidate in forbidden:
+                continue
+            if not isinstance(candidate, IRVariable):
+                continue
+            return depth
+        return None
+
+    def _spill_operand(
+        self,
+        assembly: list,
+        stack: StackModel,
+        spilled: dict[IROperand, int],
+        depth: int,
+        dry_run: bool,
+    ) -> None:
+        operand = stack.peek(depth)
+        assert isinstance(operand, IRVariable), operand
+
+        if depth != 0:
+            self.swap(assembly, stack, depth)
+
+        offset = self._get_spill_slot(operand, spilled, dry_run)
+        assembly.extend(PUSH(offset))
+        assembly.append("MSTORE")
+        stack.pop()
+        spilled[operand] = offset
+
+    def _restore_spilled_operand(
+        self,
+        assembly: list,
+        stack: StackModel,
+        spilled: dict[IROperand, int],
+        op: IRVariable,
+        dry_run: bool = False,
+    ) -> None:
+        offset = spilled.pop(op)
+        if not dry_run:
+            self._spill_free_slots.append(offset)
+        assembly.extend(PUSH(offset))
+        assembly.append("MLOAD")
+        stack.push(op)
+
+    def _get_spill_slot(
+        self, operand: IRVariable, spilled: dict[IROperand, int], dry_run: bool
+    ) -> int:
+        if operand in spilled:
+            return spilled[operand]
+        if dry_run:
+            return MemoryPositions.STACK_SPILL_BASE
+        if self._spill_free_slots:
+            return self._spill_free_slots.pop()
+        offset = MemoryPositions.STACK_SPILL_BASE + 32 * self._spill_next_slot
+        self._spill_next_slot += 1
+        return offset
+
+    def _release_dead_spills(
+        self, spilled: dict[IROperand, int], live_set: OrderedSet[IRVariable]
+    ) -> None:
+        for op in list(spilled.keys()):
+            if isinstance(op, IRVariable) and op in live_set:
+                continue
+            offset = spilled.pop(op)
+            self._spill_free_slots.append(offset)
+
     def _emit_input_operands(
         self,
         assembly: list,
@@ -242,6 +376,7 @@ def _emit_input_operands(
         ops: list[IROperand],
         stack: StackModel,
         next_liveness: OrderedSet[IRVariable],
+        spilled: dict[IROperand, int],
     ) -> None:
         # PRE: we already have all the items on the stack that have
         # been scheduled to be killed. now it's just a matter of emitting
@@ -252,6 +387,9 @@ def _emit_input_operands(
         seen: set[IROperand] = set()
 
         for op in ops:
+            if isinstance(op, IRVariable) and op in spilled:
+                self._restore_spilled_operand(assembly, stack, spilled, op)
+
             if isinstance(op, IRLabel):
                 # invoke emits the actual instruction itself so we don't need
                 # to emit it here but we need to add it to the stack map
@@ -318,7 +456,7 @@ def popmany(self, asm, to_pop: Iterable[IRVariable], stack):
             self.pop(asm, stack)
 
     def _generate_evm_for_basicblock_r(
-        self, asm: list, basicblock: IRBasicBlock, stack: StackModel
+        self, asm: list, basicblock: IRBasicBlock, stack: StackModel, spilled: dict[IROperand, int]
     ) -> None:
         if basicblock in self.visited_basicblocks:
             return
@@ -348,7 +486,7 @@ def _generate_evm_for_basicblock_r(
             else:
                 next_liveness = self.liveness.out_vars(basicblock)
 
-            asm.extend(self._generate_evm_for_instruction(inst, stack, next_liveness))
+            asm.extend(self._generate_evm_for_instruction(inst, stack, next_liveness, spilled))
 
         if DEBUG_SHOW_COST:
             print(" ".join(map(str, asm)), file=sys.stderr)
@@ -357,7 +495,7 @@ def _generate_evm_for_basicblock_r(
         ref.extend(asm)
 
         for bb in self.cfg.cfg_out(basicblock):
-            self._generate_evm_for_basicblock_r(ref, bb, stack.copy())
+            self._generate_evm_for_basicblock_r(ref, bb, stack.copy(), spilled.copy())
 
     # pop values from stack at entry to bb
     # note this produces the same result(!) no matter which basic block
@@ -382,7 +520,11 @@ def clean_stack_from_cfg_in(
         self.popmany(asm, to_pop, stack)
 
     def _generate_evm_for_instruction(
-        self, inst: IRInstruction, stack: StackModel, next_liveness: OrderedSet
+        self,
+        inst: IRInstruction,
+        stack: StackModel,
+        next_liveness: OrderedSet,
+        spilled: dict[IROperand, int],
     ) -> list[str]:
         assembly: list[AssemblyInstruction] = []
         opcode = inst.opcode
@@ -449,7 +591,7 @@ def _generate_evm_for_instruction(
             return apply_line_numbers(inst, assembly)
 
         # Step 2: Emit instruction's input operands
-        self._emit_input_operands(assembly, inst, operands, stack, next_liveness)
+        self._emit_input_operands(assembly, inst, operands, stack, next_liveness, spilled)
 
         # Step 3: Reorder stack before join points
         if opcode == "jmp":
@@ -464,16 +606,16 @@ def _generate_evm_for_instruction(
             assert len(self.cfg.cfg_in(next_bb)) > 1
 
             target_stack = self.liveness.input_vars_from(inst.parent, next_bb)
-            self._stack_reorder(assembly, stack, list(target_stack))
+            self._stack_reorder(assembly, stack, list(target_stack), spilled)
 
         if inst.is_commutative:
-            cost_no_swap = self._stack_reorder([], stack, operands, dry_run=True)
+            cost_no_swap = self._stack_reorder([], stack, operands, spilled, dry_run=True)
             operands[-1], operands[-2] = operands[-2], operands[-1]
-            cost_with_swap = self._stack_reorder([], stack, operands, dry_run=True)
+            cost_with_swap = self._stack_reorder([], stack, operands, spilled, dry_run=True)
             if cost_with_swap > cost_no_swap:
                 operands[-1], operands[-2] = operands[-2], operands[-1]
 
-        cost = self._stack_reorder([], stack, operands, dry_run=True)
+        cost = self._stack_reorder([], stack, operands, spilled, dry_run=True)
         if DEBUG_SHOW_COST and cost:
             print("ENTER", inst, file=sys.stderr)
             print("  HAVE", stack, file=sys.stderr)
@@ -482,7 +624,7 @@ def _generate_evm_for_instruction(
 
         # final step to get the inputs to this instruction ordered
         # correctly on the stack
-        self._stack_reorder(assembly, stack, operands)
+        self._stack_reorder(assembly, stack, operands, spilled)
 
         # some instructions (i.e. invoke) need to do stack manipulations
         # with the stack model containing the return value(s), so we fiddle
@@ -596,6 +738,8 @@ def _generate_evm_for_instruction(
             else:
                 self._optimistic_swap(assembly, inst, next_liveness, stack)
 
+        self._release_dead_spills(spilled, next_liveness)
+
         return apply_line_numbers(inst, assembly)
 
     def _optimistic_swap(self, assembly, inst, next_liveness, stack):

From 698ae7e0a4f74da60e59e5c1afcb3c560fe7e2c9 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Wed, 22 Oct 2025 19:14:15 +0300
Subject: [PATCH 03/11] inter test

---
 tests/unit/compiler/venom/test_stack_spill.py | 112 ++++++++++++++++--
 1 file changed, 100 insertions(+), 12 deletions(-)

diff --git a/tests/unit/compiler/venom/test_stack_spill.py b/tests/unit/compiler/venom/test_stack_spill.py
index aa274ce054..5d1ad158b5 100644
--- a/tests/unit/compiler/venom/test_stack_spill.py
+++ b/tests/unit/compiler/venom/test_stack_spill.py
@@ -1,18 +1,11 @@
-from __future__ import annotations
-
 import pytest
 
 from vyper.venom.basicblock import IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.stack_model import StackModel
 from vyper.venom.venom_to_assembly import VenomCompiler
-
-
-@pytest.fixture
-def compiler() -> VenomCompiler:
-    ctx = IRContext()
-    ctx.add_constant("mem_deploy_end", 0)
-    return VenomCompiler(ctx)
+from vyper.venom.parser import parse_venom
+from vyper.ir.compile_ir import Label
 
 
 def _build_stack(count: int) -> tuple[StackModel, list[IRLiteral]]:
@@ -35,7 +28,8 @@ def are_equivalent(self, a, b):
     return _DummyDFG()
 
 
-def test_swap_spills_deep_stack(compiler: VenomCompiler) -> None:
+def test_swap_spills_deep_stack() -> None:
+    compiler = VenomCompiler(IRContext())
     stack, ops = _build_stack(40)
     assembly: list = []
 
@@ -60,7 +54,8 @@ def test_swap_spills_deep_stack(compiler: VenomCompiler) -> None:
     assert all(int(op[4:]) <= 16 for op in ops_str if op.startswith("SWAP"))
 
 
-def test_dup_spills_deep_stack(compiler: VenomCompiler) -> None:
+def test_dup_spills_deep_stack() -> None:
+    compiler = VenomCompiler(IRContext())
     stack, ops = _build_stack(40)
     assembly: list = []
 
@@ -83,7 +78,8 @@ def test_dup_spills_deep_stack(compiler: VenomCompiler) -> None:
     assert all(int(op[3:]) <= 16 for op in ops_str if op.startswith("DUP"))
 
 
-def test_stack_reorder_spills_before_swap(compiler: VenomCompiler) -> None:
+def test_stack_reorder_spills_before_swap() -> None:
+    compiler = VenomCompiler(IRContext())
     compiler.dfg = _dummy_dfg()
     compiler._spill_next_slot = 0
     compiler._spill_free_slots = []
@@ -115,3 +111,95 @@ def test_stack_reorder_spills_before_swap(compiler: VenomCompiler) -> None:
     assert restore_ops.count("MLOAD") == 1
     assert spilled_var not in spilled
     assert stack.get_depth(spilled_var) == 0
+
+
+def test_branch_spill_integration() -> None:
+    venom_src = """
+    function spill_demo {
+        main:
+            %v0 = mload 0
+            %v1 = mload 32
+            %v2 = mload 64
+            %v3 = mload 96
+            %v4 = mload 128
+            %v5 = mload 160
+            %v6 = mload 192
+            %v7 = mload 224
+            %v8 = mload 256
+            %v9 = mload 288
+            %v10 = mload 320
+            %v11 = mload 352
+            %v12 = mload 384
+            %v13 = mload 416
+            %v14 = mload 448
+            %v15 = mload 480
+            %v16 = mload 512
+            %v17 = mload 544
+            %v18 = mload 576
+            %v19 = mload 608
+            %cond = mload 640
+            jnz %cond, @then, @else
+        then:
+            %then_sum = add %v0, %v19
+            %res_then = add %then_sum, %cond
+            jmp @join
+        else:
+            %else_sum = add %v1, %v19
+            %res_else = add %else_sum, %cond
+            jmp @join
+        join:
+            %phi = phi @then, %res_then, @else, %res_else
+            %acc1 = add %phi, %v1
+            %acc2 = add %acc1, %v2
+            %acc3 = add %acc2, %v3
+            %acc4 = add %acc3, %v4
+            %acc5 = add %acc4, %v5
+            %acc6 = add %acc5, %v6
+            %acc7 = add %acc6, %v7
+            %acc8 = add %acc7, %v8
+            %acc9 = add %acc8, %v9
+            %acc10 = add %acc9, %v10
+            %acc11 = add %acc10, %v11
+            %acc12 = add %acc11, %v12
+            %acc13 = add %acc12, %v13
+            %acc14 = add %acc13, %v14
+            %acc15 = add %acc14, %v15
+            %acc16 = add %acc15, %v16
+            %acc17 = add %acc16, %v17
+            %acc18 = add %acc17, %v18
+            return %acc18
+    }
+    """
+
+    ctx = parse_venom(venom_src)
+
+    asm = VenomCompiler(ctx).generate_evm_assembly()
+    opcodes = [op for op in asm if isinstance(op, str)]
+
+    for op in opcodes:
+        if op.startswith("SWAP"):
+            assert int(op[4:]) <= 16
+        if op.startswith("DUP"):
+            assert int(op[3:]) <= 16
+
+    def _count_spill(kind: str) -> list[int]:
+        seq = ["PUSH2", 2, 0, kind]
+        return [
+            idx
+            for idx in range(len(asm) - len(seq) + 1)
+            if asm[idx : idx + len(seq)] == seq
+        ]
+
+    store_indices = _count_spill("MSTORE")
+    load_indices = _count_spill("MLOAD")
+    assert store_indices
+    assert load_indices
+
+    join_idx = next(
+        idx for idx, op in enumerate(asm) if isinstance(op, Label) and str(op) == "LABEL join"
+    )
+
+    assert any(idx < join_idx for idx in store_indices)
+    assert any(idx > join_idx for idx in store_indices)
+    assert any(idx < join_idx for idx in load_indices)
+    assert any(idx > join_idx for idx in load_indices)

From 1fc98c8f28642856a654657c3f2d5498959cb49e Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Mon, 27 Oct 2025 14:03:42 +0200
Subject: [PATCH 04/11] gg

---
 tests/unit/compiler/venom/test_stack_spill.py |  39 ++++---
 vyper/venom/venom_to_assembly.py              | 102 +++++++++++++-----
 2 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/tests/unit/compiler/venom/test_stack_spill.py b/tests/unit/compiler/venom/test_stack_spill.py
index 5d1ad158b5..1d1e85e491 100644
--- a/tests/unit/compiler/venom/test_stack_spill.py
+++ b/tests/unit/compiler/venom/test_stack_spill.py
@@ -1,5 +1,3 @@
-import pytest
-
 from vyper.venom.basicblock import IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.stack_model import StackModel
@@ -172,8 +170,13 @@ def test_branch_spill_integration() -> None:
     """
 
     ctx = parse_venom(venom_src)
+    compiler = VenomCompiler(ctx)
+    compiler.generate_evm_assembly()
+
+    fn = next(iter(ctx.functions.values()))
+    assert any(inst.opcode == "alloca" for inst in fn.entry.instructions)
 
-    asm = VenomCompiler(ctx).generate_evm_assembly()
+    asm = compiler.generate_evm_assembly()
     opcodes = [op for op in asm if isinstance(op, str)]
 
     for op in opcodes:
@@ -182,16 +185,26 @@ def test_branch_spill_integration() -> None:
         if op.startswith("DUP"):
             assert int(op[3:]) <= 16
 
-    def _count_spill(kind: str) -> list[int]:
-        seq = ["PUSH2", 2, 0, kind]
-        return [
-            idx
-            for idx in range(len(asm) - len(seq) + 1)
-            if asm[idx : idx + len(seq)] == seq
-        ]
-
-    store_indices = _count_spill("MSTORE")
-    load_indices = _count_spill("MLOAD")
+    def _find_spill_ops(kind: str) -> list[int]:
+        matches: list[int] = []
+        idx = 0
+        while idx < len(asm):
+            item = asm[idx]
+            if isinstance(item, str) and item.startswith("PUSH"):
+                try:
+                    push_bytes = int(item[4:])
+                except ValueError:
+                    push_bytes = 0
+                target_idx = idx + 1 + push_bytes
+                if target_idx < len(asm) and asm[target_idx] == kind:
+                    matches.append(idx)
+                idx = target_idx + 1
+            else:
+                idx += 1
+        return matches
+
+    store_indices = _find_spill_ops("MSTORE")
+    load_indices = _find_spill_ops("MLOAD")
     assert store_indices
     assert load_indices
 
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index 356cc292b6..9267f4be80 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -153,8 +153,12 @@ def __init__(self, ctx: IRContext):
         self.ctx = ctx
         self.label_counter = 0
         self.visited_basicblocks = OrderedSet()
-        self._spill_next_slot = 0
         self._spill_free_slots: list[int] = []
+        self._spill_slot_offsets: dict[IRFunction, list[int]] = {}
+        self._spill_insert_index: dict[IRFunction, int] = {}
+        self._next_spill_offset = 0
+        self._next_spill_alloca_id = 0
+        self._current_function: IRFunction | None = None
 
     def mklabel(self, name: str) -> Label:
         self.label_counter += 1
@@ -175,10 +179,12 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr
 
             assert self.cfg.is_normalized(), "Non-normalized CFG!"
 
-            self._spill_next_slot = 0
-            self._spill_free_slots: list[int] = []
+            self._current_function = fn
+            self._prepare_spill_state(fn)
+            self._spill_free_slots = []
 
             self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel(), {})
+            self._current_function = None
 
         asm.extend(_REVERT_POSTAMBLE)
         # Append data segment
@@ -214,10 +220,8 @@ def _stack_reorder(
             stack = stack.copy()
             spilled = spilled.copy()
             spill_free_snapshot = self._spill_free_slots.copy()
-            spill_next_snapshot = self._spill_next_slot
         else:
             spill_free_snapshot = []
-            spill_next_snapshot = 0
 
         if len(stack_ops) == 0:
             return 0
@@ -260,14 +264,13 @@ def _stack_reorder(
                 stack.poke(depth, to_swap)
                 continue
 
-            cost += self.swap(assembly, stack, depth)
-            cost += self.swap(assembly, stack, final_stack_depth)
+            cost += self.swap(assembly, stack, depth, dry_run)
+            cost += self.swap(assembly, stack, final_stack_depth, dry_run)
 
         assert stack._stack[-len(stack_ops) :] == stack_ops, (stack, stack_ops)
 
         if dry_run:
             self._spill_free_slots = spill_free_snapshot
-            self._spill_next_slot = spill_next_snapshot
 
         return cost
 
@@ -324,7 +327,7 @@ def _spill_operand(
         assert isinstance(operand, IRVariable), operand
 
         if depth != 0:
-            self.swap(assembly, stack, depth)
+            self.swap(assembly, stack, depth, dry_run)
 
         offset = self._get_spill_slot(operand, spilled, dry_run)
         assembly.extend(PUSH(offset))
@@ -352,12 +355,7 @@ def _get_spill_slot(
     ) -> int:
         if operand in spilled:
             return spilled[operand]
-        if dry_run:
-            return MemoryPositions.STACK_SPILL_BASE
-        if self._spill_free_slots:
-            return self._spill_free_slots.pop()
-        offset = MemoryPositions.STACK_SPILL_BASE + 32 * self._spill_next_slot
-        self._spill_next_slot += 1
+        offset = self._acquire_spill_offset(dry_run)
         return offset
 
     def _release_dead_spills(
@@ -441,6 +439,51 @@ def _prepare_stack_for_function(self, asm, fn: IRFunction, stack: StackModel):
 
         self._optimistic_swap(asm, last_param, next_liveness, stack)
 
+    def _prepare_spill_state(self, fn: IRFunction) -> None:
+        if fn in self._spill_slot_offsets:
+            return
+
+        entry = fn.entry
+        insert_idx = 0
+        for inst in entry.instructions:
+            if inst.opcode == "param":
+                insert_idx += 1
+            else:
+                break
+
+        self._spill_slot_offsets[fn] = []
+        self._spill_insert_index[fn] = insert_idx
+
+    def _allocate_spill_slot(self, fn: IRFunction) -> int:
+        entry = fn.entry
+        insert_idx = self._spill_insert_index[fn]
+
+        offset = self._next_spill_offset
+        self._next_spill_offset += 32
+
+        offset_lit = IRLiteral(offset)
+        size_lit = IRLiteral(32)
+        id_lit = IRLiteral(self._next_spill_alloca_id)
+        self._next_spill_alloca_id += 1
+
+        output_var = fn.get_next_variable()
+        inst = IRInstruction("alloca", [offset_lit, size_lit, id_lit], output_var)
+        entry.insert_instruction(inst, insert_idx)
+        self._spill_insert_index[fn] += 1
+        self._spill_slot_offsets[fn].append(offset)
+        return offset
+
+    def _acquire_spill_offset(self, dry_run: bool) -> int:
+        if self._spill_free_slots:
+            return self._spill_free_slots.pop()
+        if dry_run:
+            return 0
+        if self._current_function is None:
+            offset = self._next_spill_offset
+            self._next_spill_offset += 32
+            return offset
+        return self._allocate_spill_slot(self._current_function)
+
     def popmany(self, asm, to_pop: Iterable[IRVariable], stack):
         to_pop = list(to_pop)
         # small heuristic: pop from shallowest first.
@@ -773,17 +816,17 @@ def pop(self, assembly, stack, num=1):
         assembly.extend(["POP"] * num)
 
     def _spill_stack_segment(
-        self, assembly, stack, count: int, base_offset: int
+        self, assembly, stack, count: int, dry_run: bool
     ) -> tuple[list[IROperand], list[int], int]:
         spill_ops: list[IROperand] = []
         offsets: list[int] = []
         cost = 0
 
-        for i in range(count):
+        for _ in range(count):
             op = stack.peek(0)
             spill_ops.append(op)
 
-            offset = base_offset + 32 * i
+            offset = self._acquire_spill_offset(dry_run)
             offsets.append(offset)
 
             assembly.extend(PUSH(offset))
@@ -800,6 +843,7 @@ def _restore_spilled_segment(
         spill_ops: list[IROperand],
         offsets: list[int],
         desired_indices: list[int],
+        dry_run: bool,
     ) -> int:
         cost = 0
 
@@ -809,9 +853,13 @@ def _restore_spilled_segment(
             stack.push(spill_ops[idx])
             cost += 2
 
+        if not dry_run:
+            for offset in offsets:
+                self._spill_free_slots.append(offset)
+
         return cost
 
-    def swap(self, assembly, stack, depth) -> int:
+    def swap(self, assembly, stack, depth, dry_run: bool = False) -> int:
         # Swaps of the top is no op
         if depth == 0:
             return 0
@@ -825,9 +873,7 @@ def swap(self, assembly, stack, depth) -> int:
             return 1
 
         chunk_size = swap_idx + 1
-        spill_ops, offsets, cost = self._spill_stack_segment(
-            assembly, stack, chunk_size, MemoryPositions.STACK_SPILL_BASE
-        )
+        spill_ops, offsets, cost = self._spill_stack_segment(assembly, stack, chunk_size, dry_run)
 
         indices = list(range(chunk_size))
         if chunk_size == 1:
@@ -835,10 +881,12 @@ def swap(self, assembly, stack, depth) -> int:
         else:
             desired_indices = [indices[-1]] + indices[1:-1] + [indices[0]]
 
-        cost += self._restore_spilled_segment(assembly, stack, spill_ops, offsets, desired_indices)
+        cost += self._restore_spilled_segment(
+            assembly, stack, spill_ops, offsets, desired_indices, dry_run
+        )
         return cost
 
-    def dup(self, assembly, stack, depth):
+    def dup(self, assembly, stack, depth, dry_run: bool = False):
         dup_idx = 1 - depth
         if dup_idx < 1:
             raise StackTooDeep(f"Unsupported dup depth {dup_idx}")
@@ -848,14 +896,12 @@ def dup(self, assembly, stack, depth):
             return
 
         chunk_size = dup_idx
-        spill_ops, offsets, _ = self._spill_stack_segment(
-            assembly, stack, chunk_size, MemoryPositions.STACK_SPILL_BASE
-        )
+        spill_ops, offsets, _ = self._spill_stack_segment(assembly, stack, chunk_size, dry_run)
 
         indices = list(range(chunk_size))
         desired_indices = [indices[-1]] + indices
 
-        self._restore_spilled_segment(assembly, stack, spill_ops, offsets, desired_indices)
+        self._restore_spilled_segment(assembly, stack, spill_ops, offsets, desired_indices, dry_run)
 
     def swap_op(self, assembly, stack, op):
         depth = stack.get_depth(op)

From 85cffb45364aff7949dfd184cf9c9ce15bdac167 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Thu, 6 Nov 2025 19:06:36 +0200
Subject: [PATCH 05/11] fixes

---
 tests/functional/codegen/types/test_lists.py  | 3 +--
 tests/unit/compiler/venom/test_stack_spill.py | 4 ++--
 vyper/utils.py                                | 2 +-
 vyper/venom/venom_to_assembly.py              | 9 ++++-----
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/tests/functional/codegen/types/test_lists.py b/tests/functional/codegen/types/test_lists.py
index a981987ce6..26cd16ed32 100644
--- a/tests/functional/codegen/types/test_lists.py
+++ b/tests/functional/codegen/types/test_lists.py
@@ -7,7 +7,7 @@
 from tests.utils import check_precompile_asserts, decimal_to_int
 from vyper.compiler.settings import OptimizationLevel
 from vyper.evm.opcodes import version_check
-from vyper.exceptions import ArrayIndexException, OverflowException, StackTooDeep, TypeMismatch
+from vyper.exceptions import ArrayIndexException, OverflowException, TypeMismatch
 
 
 def _map_nested(f, xs):
@@ -597,7 +597,6 @@ def bar(_baz: Foo[3]) -> String[96]:
     assert c.bar(c_input) == "Hello world!!!!"
 
 
-@pytest.mark.venom_xfail(raises=StackTooDeep, reason="stack scheduler regression")
 def test_list_of_nested_struct_arrays(get_contract):
     code = """
 struct Ded:
diff --git a/tests/unit/compiler/venom/test_stack_spill.py b/tests/unit/compiler/venom/test_stack_spill.py
index 1d1e85e491..c653b28eb2 100644
--- a/tests/unit/compiler/venom/test_stack_spill.py
+++ b/tests/unit/compiler/venom/test_stack_spill.py
@@ -1,9 +1,9 @@
+from vyper.ir.compile_ir import Label
 from vyper.venom.basicblock import IRLiteral, IRVariable
 from vyper.venom.context import IRContext
+from vyper.venom.parser import parse_venom
 from vyper.venom.stack_model import StackModel
 from vyper.venom.venom_to_assembly import VenomCompiler
-from vyper.venom.parser import parse_venom
-from vyper.ir.compile_ir import Label
 
 
 def _build_stack(count: int) -> tuple[StackModel, list[IRLiteral]]:
diff --git a/vyper/utils.py b/vyper/utils.py
index 579a095288..3c37d39ee9 100644
--- a/vyper/utils.py
+++ b/vyper/utils.py
@@ -428,7 +428,7 @@ class MemoryPositions:
     FREE_VAR_SPACE = 0
     FREE_VAR_SPACE2 = 32
     RESERVED_MEMORY = 64
-    STACK_SPILL_BASE = 0x200  # scratch space used for spilling deep stacks
+    STACK_SPILL_BASE = 0x10000  # scratch space used for spilling deep stacks
 
 
 # Sizes of different data types. Used to clamp types.
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index 9267f4be80..0109e689b3 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -156,7 +156,7 @@ def __init__(self, ctx: IRContext):
         self._spill_free_slots: list[int] = []
         self._spill_slot_offsets: dict[IRFunction, list[int]] = {}
         self._spill_insert_index: dict[IRFunction, int] = {}
-        self._next_spill_offset = 0
+        self._next_spill_offset = MemoryPositions.STACK_SPILL_BASE
         self._next_spill_alloca_id = 0
         self._current_function: IRFunction | None = None
 
@@ -234,9 +234,6 @@ def _stack_reorder(
         for i, op in enumerate(stack_ops):
             final_stack_depth = -(len(stack_ops) - i - 1)
 
-            if isinstance(op, IRVariable) and op in spilled:
-                self._restore_spilled_operand(assembly, stack, spilled, op, dry_run=dry_run)
-
             depth = stack.get_depth(op)
 
             if depth == StackModel.NOT_IN_STACK:
@@ -803,7 +800,9 @@ def _optimistic_swap(self, assembly, inst, next_liveness, stack):
         next_scheduled = next_liveness.last()
         cost = 0
         if not self.dfg.are_equivalent(inst.output, next_scheduled):
-            cost = self.swap_op(assembly, stack, next_scheduled)
+            depth = stack.get_depth(next_scheduled)
+            if depth is not StackModel.NOT_IN_STACK:
+                cost = self.swap(assembly, stack, depth)
 
         if DEBUG_SHOW_COST and cost != 0:
             print("ENTER", inst, file=sys.stderr)

From 018245d53c9db759d37e041746a7db439470d50e Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Thu, 6 Nov 2025 19:09:36 +0200
Subject: [PATCH 06/11] remove xfails

---
 tests/functional/codegen/features/test_constructor.py | 2 --
 tests/functional/codegen/features/test_immutable.py   | 2 --
 tests/functional/codegen/features/test_transient.py   | 3 +--
 3 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/tests/functional/codegen/features/test_constructor.py b/tests/functional/codegen/features/test_constructor.py
index 182e2e2ff2..3a10680690 100644
--- a/tests/functional/codegen/features/test_constructor.py
+++ b/tests/functional/codegen/features/test_constructor.py
@@ -3,7 +3,6 @@
 import pytest
 
 from tests.evm_backends.base_env import _compile
-from vyper.exceptions import StackTooDeep
 from vyper.utils import method_id
 
 
@@ -216,7 +215,6 @@ def get_foo() -> DynArray[DynArray[uint256, 3], 3]:
     assert c.get_foo() == [[37, 41, 73], [37041, 41073, 73037], [146, 123, 148]]
 
 
-@pytest.mark.venom_xfail(raises=StackTooDeep, reason="stack scheduler regression")
 def test_initialise_nested_dynamic_array_2(env, get_contract):
     code = """
 foo: DynArray[DynArray[DynArray[int128, 3], 3], 3]
diff --git a/tests/functional/codegen/features/test_immutable.py b/tests/functional/codegen/features/test_immutable.py
index 4707291662..0cca231182 100644
--- a/tests/functional/codegen/features/test_immutable.py
+++ b/tests/functional/codegen/features/test_immutable.py
@@ -1,7 +1,6 @@
 import pytest
 
 from vyper.compiler.settings import OptimizationLevel
-from vyper.exceptions import StackTooDeep
 
 
 @pytest.mark.parametrize(
@@ -199,7 +198,6 @@ def get_idx_two() -> uint256:
     assert c.get_idx_two() == expected_values[2][2]
 
 
-@pytest.mark.venom_xfail(raises=StackTooDeep, reason="stack scheduler regression")
 def test_nested_dynarray_immutable(get_contract):
     code = """
 my_list: immutable(DynArray[DynArray[DynArray[int128, 3], 3], 3])
diff --git a/tests/functional/codegen/features/test_transient.py b/tests/functional/codegen/features/test_transient.py
index 370e269cf9..2532def85b 100644
--- a/tests/functional/codegen/features/test_transient.py
+++ b/tests/functional/codegen/features/test_transient.py
@@ -2,7 +2,7 @@
 
 from tests.utils import ZERO_ADDRESS
 from vyper.compiler import compile_code
-from vyper.exceptions import EvmVersionException, StackTooDeep, VyperException
+from vyper.exceptions import EvmVersionException, VyperException
 
 pytestmark = pytest.mark.requires_evm_version("cancun")
 
@@ -343,7 +343,6 @@ def get_idx_two(_a: uint256, _b: uint256, _c: uint256) -> uint256:
     assert c.get_idx_two(*values) == expected_values[2][2]
 
 
-@pytest.mark.venom_xfail(raises=StackTooDeep, reason="stack scheduler regression")
 def test_nested_dynarray_transient(get_contract, tx_failed, env):
     set_list = """
     self.my_list = [

From 51bfe46f4ed4777c039a508266d9b5dac14dcc75 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Thu, 6 Nov 2025 21:48:35 +0200
Subject: [PATCH 07/11] refactor to use a StackSpiller class

---
 tests/unit/compiler/venom/test_stack_spill.py |  14 +-
 vyper/venom/stack_spiller.py                  | 242 +++++++++++++++++
 vyper/venom/venom_to_assembly.py              | 248 ++----------------
 3 files changed, 274 insertions(+), 230 deletions(-)
 create mode 100644 vyper/venom/stack_spiller.py

diff --git a/tests/unit/compiler/venom/test_stack_spill.py b/tests/unit/compiler/venom/test_stack_spill.py
index c653b28eb2..1d69a4ba70 100644
--- a/tests/unit/compiler/venom/test_stack_spill.py
+++ b/tests/unit/compiler/venom/test_stack_spill.py
@@ -3,6 +3,7 @@
 from vyper.venom.context import IRContext
 from vyper.venom.parser import parse_venom
 from vyper.venom.stack_model import StackModel
+from vyper.venom.stack_spiller import StackSpiller
 from vyper.venom.venom_to_assembly import VenomCompiler
 
 
@@ -38,7 +39,7 @@ def test_swap_spills_deep_stack() -> None:
     assert isinstance(depth, int) and depth < -16
     swap_idx = -depth
 
-    compiler.swap(assembly, stack, depth)
+    compiler.spiller.swap(assembly, stack, depth)
 
     expected = before.copy()
     top_index = len(expected) - 1
@@ -64,7 +65,7 @@ def test_dup_spills_deep_stack() -> None:
     assert isinstance(depth, int) and depth < -16
     dup_idx = 1 - depth
 
-    compiler.dup(assembly, stack, depth)
+    compiler.spiller.dup(assembly, stack, depth)
 
     expected = before.copy()
     expected.append(target)
@@ -77,10 +78,11 @@ def test_dup_spills_deep_stack() -> None:
 
 
 def test_stack_reorder_spills_before_swap() -> None:
-    compiler = VenomCompiler(IRContext())
+    ctx = IRContext()
+    compiler = VenomCompiler(ctx)
     compiler.dfg = _dummy_dfg()
-    compiler._spill_next_slot = 0
-    compiler._spill_free_slots = []
+
+    compiler.spiller = StackSpiller(ctx, initial_offset=0x10000)
 
     stack = StackModel()
     vars_on_stack = [IRVariable(f"%v{i}") for i in range(40)]
@@ -104,7 +106,7 @@ def test_stack_reorder_spills_before_swap() -> None:
     # restoring a spilled variable should reload it via MLOAD
     restore_assembly: list = []
     spilled_var = next(iter(spilled))
-    compiler._restore_spilled_operand(restore_assembly, stack, spilled, spilled_var)
+    compiler.spiller.restore_spilled_operand(restore_assembly, stack, spilled, spilled_var)
     restore_ops = _ops_only_strings(restore_assembly)
     assert restore_ops.count("MLOAD") == 1
     assert spilled_var not in spilled
diff --git a/vyper/venom/stack_spiller.py b/vyper/venom/stack_spiller.py
new file mode 100644
index 0000000000..ffc4d20155
--- /dev/null
+++ b/vyper/venom/stack_spiller.py
@@ -0,0 +1,242 @@
+from vyper.ir.compile_ir import PUSH
+from vyper.utils import MemoryPositions, OrderedSet
+from vyper.venom.basicblock import IRInstruction, IRLiteral, IROperand, IRVariable
+from vyper.venom.context import IRContext
+from vyper.venom.function import IRFunction
+from vyper.venom.stack_model import StackModel
+
+
+class StackSpiller:
+    """
+    Manages stack spilling operations for deep stacks.
+    - Spilling operands to memory
+    - Restoring spilled operands from memory
+    - Managing spill slot allocation and deallocation
+    """
+
+    def __init__(self, ctx: IRContext, initial_offset: int | None = None):
+        self.ctx = ctx
+        self._spill_free_slots: list[int] = []
+        self._spill_slot_offsets: dict[IRFunction, list[int]] = {}
+        self._spill_insert_index: dict[IRFunction, int] = {}
+        self._next_spill_offset = MemoryPositions.STACK_SPILL_BASE
+        if initial_offset is not None:
+            self._next_spill_offset = initial_offset
+        self._next_spill_alloca_id = 0
+        self._current_function: IRFunction | None = None
+
+    def set_current_function(self, fn: IRFunction | None) -> None:
+        """Set the current function being processed."""
+        self._current_function = fn
+        if fn is not None:
+            self._prepare_spill_state(fn)
+
+    def reset_spill_slots(self) -> None:
+        self._spill_free_slots = []
+
+    def _prepare_spill_state(self, fn: IRFunction) -> None:
+        if fn in self._spill_slot_offsets:
+            return
+
+        entry = fn.entry
+        insert_idx = 0
+        for inst in entry.instructions:
+            if inst.opcode == "param":
+                insert_idx += 1
+            else:
+                break
+
+        self._spill_slot_offsets[fn] = []
+        self._spill_insert_index[fn] = insert_idx
+
+    def spill_operand(
+        self,
+        assembly: list,
+        stack: StackModel,
+        spilled: dict[IROperand, int],
+        depth: int,
+        dry_run: bool = False,
+    ) -> None:
+        """Spill an operand from the stack to memory."""
+        operand = stack.peek(depth)
+        assert isinstance(operand, IRVariable), operand
+
+        if depth != 0:
+            self.swap(assembly, stack, depth, dry_run)
+
+        offset = self._get_spill_slot(dry_run)
+        assembly.extend(PUSH(offset))
+        assembly.append("MSTORE")
+        stack.pop()
+        spilled[operand] = offset
+
+    def restore_spilled_operand(
+        self,
+        assembly: list,
+        stack: StackModel,
+        spilled: dict[IROperand, int],
+        op: IRVariable,
+        dry_run: bool = False,
+    ) -> None:
+        """Restore a spilled operand from memory to the stack."""
+        offset = spilled.pop(op)
+        if not dry_run:
+            self._spill_free_slots.append(offset)
+        assembly.extend(PUSH(offset))
+        assembly.append("MLOAD")
+        stack.push(op)
+
+    def release_dead_spills(
+        self, spilled: dict[IROperand, int], live_set: OrderedSet[IRVariable]
+    ) -> None:
+        """Release memory slots for operands that are no longer live."""
+        for op in list(spilled.keys()):
+            if isinstance(op, IRVariable) and op in live_set:
+                continue
+            offset = spilled.pop(op)
+            self._spill_free_slots.append(offset)
+
+    def swap(self, assembly: list, stack: StackModel, depth: int, dry_run: bool = False) -> int:
+        """
+        Swap operation that handles deep stacks via spilling.
+
+        For stacks deeper than 16, spills the stack segment to memory,
+        then restores it in swapped order.
+        """
+        # Swaps of the top is no op
+        if depth == 0:
+            return 0
+
+        swap_idx = -depth
+        if swap_idx < 1:
+            from vyper.exceptions import StackTooDeep
+
+            raise StackTooDeep(f"Unsupported swap depth {swap_idx}")
+
+        if swap_idx <= 16:
+            stack.swap(depth)
+            assembly.append(f"SWAP{swap_idx}")
+            return 1
+
+        # For deep stacks, use spill/restore technique
+        chunk_size = swap_idx + 1
+        spill_ops, offsets, cost = self._spill_stack_segment(assembly, stack, chunk_size, dry_run)
+
+        indices = list(range(chunk_size))
+        if chunk_size == 1:
+            desired_indices = indices
+        else:
+            desired_indices = [indices[-1]] + indices[1:-1] + [indices[0]]
+
+        cost += self._restore_spilled_segment(
+            assembly, stack, spill_ops, offsets, desired_indices, dry_run
+        )
+        return cost
+
+    def dup(self, assembly: list, stack: StackModel, depth: int, dry_run: bool = False) -> None:
+        """
+        Dup operation that handles deep stacks via spilling.
+
+        For stacks deeper than 16, spills the stack segment to memory,
+        then restores it with duplication.
+        """
+        dup_idx = 1 - depth
+        if dup_idx < 1:
+            from vyper.exceptions import StackTooDeep
+
+            raise StackTooDeep(f"Unsupported dup depth {dup_idx}")
+
+        if dup_idx <= 16:
+            stack.dup(depth)
+            assembly.append(f"DUP{dup_idx}")
+            return
+
+        # For deep stacks, use spill/restore technique
+        chunk_size = dup_idx
+        spill_ops, offsets, _ = self._spill_stack_segment(assembly, stack, chunk_size, dry_run)
+
+        indices = list(range(chunk_size))
+        desired_indices = [indices[-1]] + indices
+
+        self._restore_spilled_segment(assembly, stack, spill_ops, offsets, desired_indices, dry_run)
+
+    def _spill_stack_segment(
+        self, assembly: list, stack: StackModel, count: int, dry_run: bool
+    ) -> tuple[list[IROperand], list[int], int]:
+        """Spill a segment of the stack to memory."""
+        spill_ops: list[IROperand] = []
+        offsets: list[int] = []
+        cost = 0
+
+        for _ in range(count):
+            op = stack.peek(0)
+            spill_ops.append(op)
+
+            offset = self._acquire_spill_offset(dry_run)
+            offsets.append(offset)
+
+            assembly.extend(PUSH(offset))
+            assembly.append("MSTORE")
+            stack.pop()
+            cost += 2
+
+        return spill_ops, offsets, cost
+
+    def _restore_spilled_segment(
+        self,
+        assembly: list,
+        stack: StackModel,
+        spill_ops: list[IROperand],
+        offsets: list[int],
+        desired_indices: list[int],
+        dry_run: bool,
+    ) -> int:
+        """Restore a spilled segment from memory to the stack."""
+        cost = 0
+
+        for idx in reversed(desired_indices):
+            assembly.extend(PUSH(offsets[idx]))
+            assembly.append("MLOAD")
+            stack.push(spill_ops[idx])
+            cost += 2
+
+        if not dry_run:
+            for offset in offsets:
+                self._spill_free_slots.append(offset)
+
+        return cost
+
+    def _get_spill_slot(self, dry_run: bool) -> int:
+        if dry_run:
+            return self._acquire_spill_offset(dry_run)
+        if self._current_function is None:
+            offset = self._next_spill_offset
+            self._next_spill_offset += 32
+            return offset
+        return self._allocate_spill_slot(self._current_function)
+
+    def _acquire_spill_offset(self, dry_run: bool) -> int:
+        if self._spill_free_slots:
+            return self._spill_free_slots.pop() if not dry_run else self._spill_free_slots[-1]
+        return self._get_spill_slot(dry_run)
+
+    def _allocate_spill_slot(self, fn: IRFunction) -> int:
+        entry = fn.entry
+        insert_idx = self._spill_insert_index[fn]
+
+        offset = self._next_spill_offset
+        self._next_spill_offset += 32
+
+        offset_lit = IRLiteral(offset)
+        size_lit = IRLiteral(32)
+        id_lit = IRLiteral(self._next_spill_alloca_id)
+        self._next_spill_alloca_id += 1
+
+        output_var = fn.get_next_variable()
+
+        inst = IRInstruction("alloca", [offset_lit, size_lit, id_lit], output_var)
+        entry.instructions.insert(insert_idx, inst)
+        self._spill_insert_index[fn] += 1
+
+        self._spill_slot_offsets[fn].append(offset)
+        return offset
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index 0109e689b3..6a61d2fca1 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -3,7 +3,7 @@
 from typing import Any, Iterable
 
 from vyper.evm.assembler.instructions import DATA_ITEM, PUSH, DataHeader
-from vyper.exceptions import CompilerPanic, StackTooDeep
+from vyper.exceptions import CompilerPanic
 from vyper.ir.compile_ir import (
     PUSH_OFST,
     PUSHLABEL,
@@ -26,6 +26,7 @@
 )
 from vyper.venom.context import IRContext, IRFunction
 from vyper.venom.stack_model import StackModel
+from vyper.venom.stack_spiller import StackSpiller
 
 DEBUG_SHOW_COST = False
 if DEBUG_SHOW_COST:
@@ -153,12 +154,7 @@ def __init__(self, ctx: IRContext):
         self.ctx = ctx
         self.label_counter = 0
         self.visited_basicblocks = OrderedSet()
-        self._spill_free_slots: list[int] = []
-        self._spill_slot_offsets: dict[IRFunction, list[int]] = {}
-        self._spill_insert_index: dict[IRFunction, int] = {}
-        self._next_spill_offset = MemoryPositions.STACK_SPILL_BASE
-        self._next_spill_alloca_id = 0
-        self._current_function: IRFunction | None = None
+        self.spiller = StackSpiller(ctx)
 
     def mklabel(self, name: str) -> Label:
         self.label_counter += 1
@@ -179,12 +175,11 @@ def generate_evm_assembly(self, no_optimize: bool = False) -> list[AssemblyInstr
 
             assert self.cfg.is_normalized(), "Non-normalized CFG!"
 
-            self._current_function = fn
-            self._prepare_spill_state(fn)
-            self._spill_free_slots = []
+            self.spiller.set_current_function(fn)
+            self.spiller.reset_spill_slots()
 
             self._generate_evm_for_basicblock_r(asm, fn.entry, StackModel(), {})
-            self._current_function = None
+            self.spiller.set_current_function(None)
 
         asm.extend(_REVERT_POSTAMBLE)
         # Append data segment
@@ -219,7 +214,7 @@ def _stack_reorder(
             assert len(assembly) == 0, "Dry run should not work on assembly"
             stack = stack.copy()
             spilled = spilled.copy()
-            spill_free_snapshot = self._spill_free_slots.copy()
+            spill_free_snapshot = self.spiller._spill_free_slots.copy()
         else:
             spill_free_snapshot = []
 
@@ -238,7 +233,9 @@ def _stack_reorder(
 
             if depth == StackModel.NOT_IN_STACK:
                 if isinstance(op, IRVariable) and op in spilled:
-                    self._restore_spilled_operand(assembly, stack, spilled, op, dry_run=dry_run)
+                    self.spiller.restore_spilled_operand(
+                        assembly, stack, spilled, op, dry_run=dry_run
+                    )
                     depth = stack.get_depth(op)
                 else:
                     raise CompilerPanic(f"Variable {op} not in stack")
@@ -261,13 +258,13 @@ def _stack_reorder(
                 stack.poke(depth, to_swap)
                 continue
 
-            cost += self.swap(assembly, stack, depth, dry_run)
-            cost += self.swap(assembly, stack, final_stack_depth, dry_run)
+            cost += self.spiller.swap(assembly, stack, depth, dry_run)
+            cost += self.spiller.swap(assembly, stack, final_stack_depth, dry_run)
 
         assert stack._stack[-len(stack_ops) :] == stack_ops, (stack, stack_ops)
 
         if dry_run:
-            self._spill_free_slots = spill_free_snapshot
+            self.spiller._spill_free_slots = spill_free_snapshot
 
         return cost
 
@@ -285,11 +282,13 @@ def _reduce_depth_via_spill(
             candidate_depth = self._select_spill_candidate(stack, stack_ops, depth)
             if candidate_depth is None:
                 return False
-            self._spill_operand(assembly, stack, spilled, candidate_depth, dry_run)
+            self.spiller.spill_operand(assembly, stack, spilled, candidate_depth, dry_run)
             depth = stack.get_depth(target_op)
             if depth == StackModel.NOT_IN_STACK:
                 if isinstance(target_op, IRVariable) and target_op in spilled:
-                    self._restore_spilled_operand(assembly, stack, spilled, target_op, dry_run)
+                    self.spiller.restore_spilled_operand(
+                        assembly, stack, spilled, target_op, dry_run
+                    )
                     depth = stack.get_depth(target_op)
                 else:
                     return False
@@ -312,58 +311,6 @@ def _select_spill_candidate(
             return depth
         return None
 
-    def _spill_operand(
-        self,
-        assembly: list,
-        stack: StackModel,
-        spilled: dict[IROperand, int],
-        depth: int,
-        dry_run: bool,
-    ) -> None:
-        operand = stack.peek(depth)
-        assert isinstance(operand, IRVariable), operand
-
-        if depth != 0:
-            self.swap(assembly, stack, depth, dry_run)
-
-        offset = self._get_spill_slot(operand, spilled, dry_run)
-        assembly.extend(PUSH(offset))
-        assembly.append("MSTORE")
-        stack.pop()
-        spilled[operand] = offset
-
-    def _restore_spilled_operand(
-        self,
-        assembly: list,
-        stack: StackModel,
-        spilled: dict[IROperand, int],
-        op: IRVariable,
-        dry_run: bool = False,
-    ) -> None:
-        offset = spilled.pop(op)
-        if not dry_run:
-            self._spill_free_slots.append(offset)
-        assembly.extend(PUSH(offset))
-        assembly.append("MLOAD")
-        stack.push(op)
-
-    def _get_spill_slot(
-        self, operand: IRVariable, spilled: dict[IROperand, int], dry_run: bool
-    ) -> int:
-        if operand in spilled:
-            return spilled[operand]
-        offset = self._acquire_spill_offset(dry_run)
-        return offset
-
-    def _release_dead_spills(
-        self, spilled: dict[IROperand, int], live_set: OrderedSet[IRVariable]
-    ) -> None:
-        for op in list(spilled.keys()):
-            if isinstance(op, IRVariable) and op in live_set:
-                continue
-            offset = spilled.pop(op)
-            self._spill_free_slots.append(offset)
-
     def _emit_input_operands(
         self,
         assembly: list,
@@ -383,7 +330,7 @@ def _emit_input_operands(
 
         for op in ops:
             if isinstance(op, IRVariable) and op in spilled:
-                self._restore_spilled_operand(assembly, stack, spilled, op)
+                self.spiller.restore_spilled_operand(assembly, stack, spilled, op)
 
             if isinstance(op, IRLabel):
                 # invoke emits the actual instruction itself so we don't need
@@ -436,51 +383,6 @@ def _prepare_stack_for_function(self, asm, fn: IRFunction, stack: StackModel):
 
         self._optimistic_swap(asm, last_param, next_liveness, stack)
 
-    def _prepare_spill_state(self, fn: IRFunction) -> None:
-        if fn in self._spill_slot_offsets:
-            return
-
-        entry = fn.entry
-        insert_idx = 0
-        for inst in entry.instructions:
-            if inst.opcode == "param":
-                insert_idx += 1
-            else:
-                break
-
-        self._spill_slot_offsets[fn] = []
-        self._spill_insert_index[fn] = insert_idx
-
-    def _allocate_spill_slot(self, fn: IRFunction) -> int:
-        entry = fn.entry
-        insert_idx = self._spill_insert_index[fn]
-
-        offset = self._next_spill_offset
-        self._next_spill_offset += 32
-
-        offset_lit = IRLiteral(offset)
-        size_lit = IRLiteral(32)
-        id_lit = IRLiteral(self._next_spill_alloca_id)
-        self._next_spill_alloca_id += 1
-
-        output_var = fn.get_next_variable()
-        inst = IRInstruction("alloca", [offset_lit, size_lit, id_lit], output_var)
-        entry.insert_instruction(inst, insert_idx)
-        self._spill_insert_index[fn] += 1
-        self._spill_slot_offsets[fn].append(offset)
-        return offset
-
-    def _acquire_spill_offset(self, dry_run: bool) -> int:
-        if self._spill_free_slots:
-            return self._spill_free_slots.pop()
-        if dry_run:
-            return 0
-        if self._current_function is None:
-            offset = self._next_spill_offset
-            self._next_spill_offset += 32
-            return offset
-        return self._allocate_spill_slot(self._current_function)
-
     def popmany(self, asm, to_pop: Iterable[IRVariable], stack):
         to_pop = list(to_pop)
         # small heuristic: pop from shallowest first.
@@ -492,7 +394,7 @@ def popmany(self, asm, to_pop: Iterable[IRVariable], stack):
             depth = stack.get_depth(var)
 
             if depth != 0:
-                self.swap(asm, stack, depth)
+                self.spiller.swap(asm, stack, depth)
             self.pop(asm, stack)
 
     def _generate_evm_for_basicblock_r(
@@ -616,7 +518,7 @@ def _generate_evm_for_instruction(
             if to_be_replaced in next_liveness:
                 # this branch seems unreachable (maybe due to make_ssa)
                 # %13/%14 is still live(!), so we make a copy of it
-                self.dup(assembly, stack, depth)
+                self.spiller.dup(assembly, stack, depth)
                 stack.poke(0, ret)
             else:
                 stack.poke(depth, ret)
@@ -778,7 +680,7 @@ def _generate_evm_for_instruction(
             else:
                 self._optimistic_swap(assembly, inst, next_liveness, stack)
 
-        self._release_dead_spills(spilled, next_liveness)
+        self.spiller.release_dead_spills(spilled, next_liveness)
 
         return apply_line_numbers(inst, assembly)
 
@@ -802,7 +704,7 @@ def _optimistic_swap(self, assembly, inst, next_liveness, stack):
         if not self.dfg.are_equivalent(inst.output, next_scheduled):
             depth = stack.get_depth(next_scheduled)
             if depth is not StackModel.NOT_IN_STACK:
-                cost = self.swap(assembly, stack, depth)
+                cost = self.spiller.swap(assembly, stack, depth)
 
         if DEBUG_SHOW_COST and cost != 0:
             print("ENTER", inst, file=sys.stderr)
@@ -814,114 +716,12 @@ def pop(self, assembly, stack, num=1):
         stack.pop(num)
         assembly.extend(["POP"] * num)
 
-    def _spill_stack_segment(
-        self, assembly, stack, count: int, dry_run: bool
-    ) -> tuple[list[IROperand], list[int], int]:
-        spill_ops: list[IROperand] = []
-        offsets: list[int] = []
-        cost = 0
-
-        for _ in range(count):
-            op = stack.peek(0)
-            spill_ops.append(op)
-
-            offset = self._acquire_spill_offset(dry_run)
-            offsets.append(offset)
-
-            assembly.extend(PUSH(offset))
-            assembly.append("MSTORE")
-            stack.pop()
-            cost += 2
-
-        return spill_ops, offsets, cost
-
-    def _restore_spilled_segment(
-        self,
-        assembly,
-        stack,
-        spill_ops: list[IROperand],
-        offsets: list[int],
-        desired_indices: list[int],
-        dry_run: bool,
-    ) -> int:
-        cost = 0
-
-        for idx in reversed(desired_indices):
-            assembly.extend(PUSH(offsets[idx]))
-            assembly.append("MLOAD")
-            stack.push(spill_ops[idx])
-            cost += 2
-
-        if not dry_run:
-            for offset in offsets:
-                self._spill_free_slots.append(offset)
-
-        return cost
-
-    def swap(self, assembly, stack, depth, dry_run: bool = False) -> int:
-        # Swaps of the top is no op
-        if depth == 0:
-            return 0
-
-        swap_idx = -depth
-        if swap_idx < 1:
-            raise StackTooDeep(f"Unsupported swap depth {swap_idx}")
-        if swap_idx <= 16:
-            stack.swap(depth)
-            assembly.append(_evm_swap_for(depth))
-            return 1
-
-        chunk_size = swap_idx + 1
-        spill_ops, offsets, cost = self._spill_stack_segment(assembly, stack, chunk_size, dry_run)
-
-        indices = list(range(chunk_size))
-        if chunk_size == 1:
-            desired_indices = indices
-        else:
-            desired_indices = [indices[-1]] + indices[1:-1] + [indices[0]]
-
-        cost += self._restore_spilled_segment(
-            assembly, stack, spill_ops, offsets, desired_indices, dry_run
-        )
-        return cost
-
-    def dup(self, assembly, stack, depth, dry_run: bool = False):
-        dup_idx = 1 - depth
-        if dup_idx < 1:
-            raise StackTooDeep(f"Unsupported dup depth {dup_idx}")
-        if dup_idx <= 16:
-            stack.dup(depth)
-            assembly.append(_evm_dup_for(depth))
-            return
-
-        chunk_size = dup_idx
-        spill_ops, offsets, _ = self._spill_stack_segment(assembly, stack, chunk_size, dry_run)
-
-        indices = list(range(chunk_size))
-        desired_indices = [indices[-1]] + indices
-
-        self._restore_spilled_segment(assembly, stack, spill_ops, offsets, desired_indices, dry_run)
-
     def swap_op(self, assembly, stack, op):
         depth = stack.get_depth(op)
         assert depth is not StackModel.NOT_IN_STACK, f"Cannot swap non-existent operand {op}"
-        return self.swap(assembly, stack, depth)
+        return self.spiller.swap(assembly, stack, depth)
 
     def dup_op(self, assembly, stack, op):
         depth = stack.get_depth(op)
         assert depth is not StackModel.NOT_IN_STACK, f"Cannot dup non-existent operand {op}"
-        self.dup(assembly, stack, depth)
-
-
-def _evm_swap_for(depth: int) -> str:
-    swap_idx = -depth
-    if not (1 <= swap_idx <= 16):
-        raise StackTooDeep(f"Unsupported swap depth {swap_idx}")
-    return f"SWAP{swap_idx}"
-
-
-def _evm_dup_for(depth: int) -> str:
-    dup_idx = 1 - depth
-    if not (1 <= dup_idx <= 16):
-        raise StackTooDeep(f"Unsupported dup depth {dup_idx}")
-    return f"DUP{dup_idx}"
+        self.spiller.dup(assembly, stack, depth)

From 51e09d1ce93ca7d89811ea65d409a7ede66246e2 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Thu, 6 Nov 2025 22:21:08 +0200
Subject: [PATCH 08/11] tests

---
 tests/unit/compiler/venom/test_stack_spill.py | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/tests/unit/compiler/venom/test_stack_spill.py b/tests/unit/compiler/venom/test_stack_spill.py
index 1d69a4ba70..361ee2cc42 100644
--- a/tests/unit/compiler/venom/test_stack_spill.py
+++ b/tests/unit/compiler/venom/test_stack_spill.py
@@ -1,3 +1,5 @@
+import pytest
+
 from vyper.ir.compile_ir import Label
 from vyper.venom.basicblock import IRLiteral, IRVariable
 from vyper.venom.context import IRContext
@@ -218,3 +220,42 @@ def _find_spill_ops(kind: str) -> list[int]:
     assert any(idx > join_idx for idx in store_indices)
     assert any(idx < join_idx for idx in load_indices)
     assert any(idx > join_idx for idx in load_indices)
+
+
+def test_dup_op_operand_not_in_stack() -> None:
+    compiler = VenomCompiler(IRContext())
+    stack = StackModel()
+    assembly: list = []
+
+    ops = [IRVariable(f"%{i}") for i in range(5)]
+    for op in ops:
+        stack.push(op)
+
+    not_in_stack = IRVariable("%99")
+
+    with pytest.raises(AssertionError):
+        compiler.dup_op(assembly, stack, not_in_stack)
+
+
+def test_stack_reorder_operand_not_in_stack_but_spilled() -> None:
+    ctx = IRContext()
+    compiler = VenomCompiler(ctx)
+    compiler.dfg = _dummy_dfg()
+
+    stack = StackModel()
+    for i in range(5):
+        stack.push(IRVariable(f"%{i}"))
+
+    spilled_var = IRVariable("%spilled")
+    spilled: dict = {spilled_var: 0x10000}
+
+    assembly: list = []
+
+    # Try to reorder with spilled_var as target (should restore it from memory)
+    compiler._stack_reorder(assembly, stack, [spilled_var], spilled, dry_run=False)
+
+    # Should have restored the spilled variable
+    assert stack.get_depth(spilled_var) == 0  # Should be on top of stack
+    assert spilled_var not in spilled  # Should have been removed from spilled dict
+    # Assembly should contain PUSH and MLOAD to restore
+    assert "MLOAD" in assembly

From b689a97fb5b92ab5bf25749a5f41ea4a0439e655 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Thu, 6 Nov 2025 22:45:13 +0200
Subject: [PATCH 09/11] remove variable only used in dry_run

---
 vyper/venom/venom_to_assembly.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index 6a61d2fca1..0e1ac4a9c9 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -215,8 +215,6 @@ def _stack_reorder(
             stack = stack.copy()
             spilled = spilled.copy()
             spill_free_snapshot = self.spiller._spill_free_slots.copy()
-        else:
-            spill_free_snapshot = []
 
         if len(stack_ops) == 0:
             return 0

From 9b813c8ca2d8b96442d1f7dbac99fd3aab26a4b9 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Thu, 27 Nov 2025 20:11:12 +0200
Subject: [PATCH 10/11] fix

---
 vyper/venom/venom_to_assembly.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index f864a77931..4843aa1caf 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -392,7 +392,7 @@ def popmany(self, asm, to_pop: Iterable[IRVariable], stack):
         deepest = min(depths)
         expected = list(range(deepest, 0))
         if deepest < 0 and -deepest <= 16 and sorted(depths) == expected:
-            self.swap(asm, stack, deepest)
+            self.spiller.swap(asm, stack, deepest)
             self.pop(asm, stack, len(to_pop))
             return
 

From ad8215bb33fdb8981c4d8bb86af3c9275256bd78 Mon Sep 17 00:00:00 2001
From: Harry Kalogirou <harkal@nlogn.eu>
Date: Thu, 27 Nov 2025 20:12:43 +0200
Subject: [PATCH 11/11] update code

---
 vyper/venom/stack_spiller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vyper/venom/stack_spiller.py b/vyper/venom/stack_spiller.py
index ffc4d20155..6170c9add1 100644
--- a/vyper/venom/stack_spiller.py
+++ b/vyper/venom/stack_spiller.py
@@ -234,7 +234,7 @@ def _allocate_spill_slot(self, fn: IRFunction) -> int:
 
         output_var = fn.get_next_variable()
 
-        inst = IRInstruction("alloca", [offset_lit, size_lit, id_lit], output_var)
+        inst = IRInstruction("alloca", [offset_lit, size_lit, id_lit], [output_var])
         entry.instructions.insert(insert_idx, inst)
         self._spill_insert_index[fn] += 1