From 91c9035145e23e33585fd887b175cb4985f71733 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 14:29:33 +0200
Subject: [PATCH 01/24] draft - memory fuzzer for venom

---
 tests/functional/venom/test_memory_fuzzer.py | 434 +++++++++++++++++++
 1 file changed, 434 insertions(+)
 create mode 100644 tests/functional/venom/test_memory_fuzzer.py

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
new file mode 100644
index 0000000000..bde2174c8c
--- /dev/null
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -0,0 +1,434 @@
+"""
+Memory fuzzer for Venom IR.
+
+This fuzzer generates complex control flow with memory instructions to test
+memory optimization passes. It uses the IRBasicBlock API directly and
+can be plugged with any Venom passes.
+"""
+
+import pytest
+import hypothesis as hp
+import hypothesis.strategies as st
+from typing import List, Optional, Set
+
+from tests.venom_utils import PrePostChecker
+from tests.hevm import hevm_check_venom_ctx
+from vyper.venom.analysis import IRAnalysesCache
+from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRVariable, IRLiteral, IRLabel
+from vyper.venom.context import IRContext
+from vyper.venom.function import IRFunction
+from vyper.venom.passes.base_pass import IRPass
+
+# Memory operations that can be fuzzed
+MEMORY_OPS = ["mload", "mstore", "mcopy"]
+
+# Precompile addresses for fence operations that generate real data
+PRECOMPILES = {
+    0x1: "ecrecover",      # Returns 32 bytes
+    0x2: "sha256",         # Returns 32 bytes  
+    0x3: "ripemd160",      # Returns 32 bytes
+    0x4: "identity",       # Returns input data
+    0x5: "modexp",         # Returns variable length
+    0x6: "ecadd",          # Returns 64 bytes
+    0x7: "ecmul",          # Returns 64 bytes
+    0x8: "ecpairing",      # Returns 32 bytes
+    0x9: "blake2f",        # Returns 64 bytes
+}
+
+# Constants for fuzzing
+MAX_MEMORY_SIZE = 4096  # Limit memory to 4096 bytes
+MAX_BASIC_BLOCKS = 8
+MAX_INSTRUCTIONS_PER_BLOCK = 8
+MAX_VARIABLES = 20
+
+
+class MemoryFuzzer:
+    """Generates random Venom IR with memory operations using IRBasicBlock API."""
+    
+    def __init__(self, seed_memory: bool = True, allow_params: bool = True):
+        self.seed_memory = seed_memory
+        self.allow_params = allow_params
+        self.ctx = IRContext()
+        self.function = None
+        self.variable_counter = 0
+        self.bb_counter = 0
+        self.available_vars = []  # Variables available for use
+        
+    def get_next_variable(self) -> IRVariable:
+        """Generate a new unique variable."""
+        self.variable_counter += 1
+        var = IRVariable(f"v{self.variable_counter}")
+        self.available_vars.append(var)
+        return var
+    
+    def get_next_bb_label(self) -> IRLabel:
+        """Generate a new unique basic block label.""" 
+        self.bb_counter += 1
+        return IRLabel(f"bb{self.bb_counter}")
+    
+    def get_random_variable(self, draw) -> IRVariable:
+        """Get a random available variable or create a new one."""
+        if self.available_vars and draw(st.booleans()):
+            return draw(st.sampled_from(self.available_vars))
+        else:
+            return self.get_next_variable()
+    
+    def get_memory_address(self, draw) -> IRVariable | IRLiteral:
+        """Get a memory address (either variable or aligned literal)."""
+        if self.available_vars and draw(st.booleans()):
+            return draw(st.sampled_from(self.available_vars))
+        else:
+            # Generate aligned memory addresses (multiples of 32)
+            addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32)) & ~31
+            return IRLiteral(addr)
+
+
+@st.composite  
+def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict:
+    """Generate a complex control flow graph structure."""
+    num_blocks = draw(st.integers(min_value=2, max_value=max_blocks))
+    
+    # Create adjacency list representation
+    # Block 0 is always the entry, highest numbered block is always the exit
+    edges = {}
+    
+    for i in range(num_blocks):
+        edges[i] = []
+    
+    # Ensure connectivity: each block (except exit) has at least one outgoing edge
+    for i in range(num_blocks - 1):
+        # Add at least one outgoing edge to ensure no dead blocks
+        if i == num_blocks - 2:
+            # Second-to-last block must connect to exit
+            edges[i].append(num_blocks - 1)
+        else:
+            # Can connect to any later block 
+            target = draw(st.integers(min_value=i + 1, max_value=num_blocks - 1))
+            edges[i].append(target)
+    
+    # Add some additional random edges for complexity
+    for i in range(num_blocks - 1):
+        # Chance to add more outgoing edges
+        if draw(st.booleans()):
+            # Don't create too many edges
+            max_additional = min(2, num_blocks - i - 2)
+            if max_additional > 0:
+                num_additional = draw(st.integers(min_value=0, max_value=max_additional))
+                for _ in range(num_additional):
+                    # Choose a target we're not already connected to
+                    possible_targets = [j for j in range(i + 1, num_blocks) if j not in edges[i]]
+                    if possible_targets:
+                        target = draw(st.sampled_from(possible_targets))
+                        edges[i].append(target)
+    
+    return {"num_blocks": num_blocks, "edges": edges}
+
+
+@st.composite
+def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
+    """Generate and append a memory instruction to current basic block."""
+    op = draw(st.sampled_from(MEMORY_OPS))
+    bb = fuzzer.current_bb
+    
+    if op == "mload":
+        # %result = mload %addr
+        addr = fuzzer.get_memory_address(draw)
+        result_var = bb.append_instruction("mload", addr)
+        
+    elif op == "mstore":
+        # mstore %value, %addr
+        value = fuzzer.get_random_variable(draw) if fuzzer.available_vars else IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1)))
+        addr = fuzzer.get_memory_address(draw)
+        bb.append_instruction("mstore", value, addr)
+        
+    elif op == "mcopy":
+        # mcopy %dest, %src, %length
+        dest = fuzzer.get_memory_address(draw)
+        src = fuzzer.get_memory_address(draw)
+        length = IRLiteral(32)  # Copy 32 bytes
+        bb.append_instruction("mcopy", dest, src, length)
+
+
+@st.composite
+def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
+    """Generate a call to a precompile that produces real output data."""
+    bb = fuzzer.current_bb
+    
+    # Choose a precompile
+    precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys())))
+    precompile_name = PRECOMPILES[precompile_addr]
+    
+    # Set up input data in memory
+    input_offset = fuzzer.get_memory_address(draw)
+    output_offset = fuzzer.get_memory_address(draw)
+    
+    if precompile_name == "identity":
+        # Identity precompile - copies input to output
+        input_size = IRLiteral(32)
+        output_size = IRLiteral(32)
+    elif precompile_name == "sha256":
+        # SHA256 - takes any input, outputs 32 bytes
+        input_size = IRLiteral(64)  # Use 64 bytes input
+        output_size = IRLiteral(32)
+    elif precompile_name == "blake2f":
+        # Blake2f - outputs 64 bytes
+        input_size = IRLiteral(213)  # Blake2f requires 213 bytes input
+        output_size = IRLiteral(64)
+    elif precompile_name in ["ecadd", "ecmul"]:
+        # EC operations - specific input/output sizes
+        input_size = IRLiteral(96)  # EC point operations
+        output_size = IRLiteral(64)
+    else:
+        # Default case
+        input_size = IRLiteral(32)
+        output_size = IRLiteral(32)
+    
+    # Call the precompile
+    gas = IRLiteral(100000)  # Plenty of gas
+    addr = IRLiteral(precompile_addr)
+    value = IRLiteral(0)
+    
+    result_var = bb.append_instruction("staticcall", gas, addr, input_offset, input_size, output_offset, output_size)
+
+
+@st.composite
+def seed_memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
+    """Generate an instruction that seeds memory with data."""
+    bb = fuzzer.current_bb
+    
+    if fuzzer.allow_params:
+        # Use calldataload to get "random" data from parameters
+        offset = IRLiteral(draw(st.integers(min_value=0, max_value=256, step=32)))
+        data_var = bb.append_instruction("calldataload", offset)
+        
+        # Store it in memory
+        mem_addr = fuzzer.get_memory_address(draw)
+        bb.append_instruction("mstore", data_var, mem_addr)
+    else:
+        # Just store a literal value
+        value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1)))
+        mem_addr = fuzzer.get_memory_address(draw)
+        bb.append_instruction("mstore", value, mem_addr)
+
+
+@st.composite
+def basic_block_instructions(draw, fuzzer: MemoryFuzzer, is_entry: bool = False) -> None:
+    """Generate instructions for a basic block."""
+    
+    # For entry block, seed some memory first
+    if is_entry and fuzzer.seed_memory:
+        num_seeds = draw(st.integers(min_value=1, max_value=3))
+        for _ in range(num_seeds):
+            draw(seed_memory_instruction(fuzzer))
+    
+    # Generate main instructions
+    num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK))
+    
+    for _ in range(num_instructions):
+        # Choose instruction type
+        inst_type = draw(st.sampled_from(["memory", "precompile", "seed"]))
+        
+        if inst_type == "memory":
+            draw(memory_instruction(fuzzer))
+        elif inst_type == "precompile": 
+            draw(precompile_call(fuzzer))
+        elif inst_type == "seed":
+            draw(seed_memory_instruction(fuzzer))
+
+
+@st.composite
+def venom_function_with_memory_ops(draw) -> IRContext:
+    """Generate a complete Venom IR function using IRBasicBlock API."""
+    
+    fuzzer = MemoryFuzzer(seed_memory=True, allow_params=True)
+    
+    # Create function
+    func_name = IRLabel("_fuzz_function", is_symbol=True)
+    fuzzer.function = IRFunction(func_name, fuzzer.ctx)
+    fuzzer.ctx.functions[func_name] = fuzzer.function
+    fuzzer.ctx.entry_function = fuzzer.function
+    
+    # Generate control flow structure
+    cfg = draw(control_flow_graph())
+    num_blocks = cfg["num_blocks"]
+    edges = cfg["edges"]
+    
+    # Create all basic blocks first
+    basic_blocks = []
+    for i in range(num_blocks):
+        if i == 0:
+            label = IRLabel("entry")
+        else:
+            label = fuzzer.get_next_bb_label()
+        
+        bb = IRBasicBlock(label, fuzzer.function)
+        fuzzer.function._basic_block_dict[label.value] = bb
+        basic_blocks.append(bb)
+    
+    # Set entry block
+    fuzzer.function.entry = basic_blocks[0]
+    
+    # Generate instructions for each block
+    for i, bb in enumerate(basic_blocks):
+        fuzzer.current_bb = bb
+        
+        # Generate block content
+        is_entry = (i == 0)
+        draw(basic_block_instructions(fuzzer, is_entry=is_entry))
+        
+        # Add terminator instruction
+        outgoing_edges = edges[i]
+        
+        if i == num_blocks - 1:
+            # Exit block - return memory contents
+            bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0))
+        elif len(outgoing_edges) == 1:
+            # Single outgoing edge - unconditional jump
+            target_bb = basic_blocks[outgoing_edges[0]]
+            bb.append_instruction("jmp", target_bb.label)
+        elif len(outgoing_edges) == 2:
+            # Two outgoing edges - conditional jump
+            # Create condition based on memory contents or available variable
+            if fuzzer.available_vars:
+                cond_var = draw(st.sampled_from(fuzzer.available_vars))
+            else:
+                # Load something from memory as condition
+                cond_var = bb.append_instruction("mload", IRLiteral(0))
+            
+            target1_bb = basic_blocks[outgoing_edges[0]]
+            target2_bb = basic_blocks[outgoing_edges[1]]
+            bb.append_instruction("jnz", target1_bb.label, target2_bb.label, cond_var)
+        else:
+            # Multiple edges - use djmp (dynamic jump table)
+            if fuzzer.available_vars:
+                selector_var = draw(st.sampled_from(fuzzer.available_vars))
+            else:
+                selector_var = bb.append_instruction("mload", IRLiteral(0))
+            
+            # Create jump table
+            target_labels = [basic_blocks[edge].label for edge in outgoing_edges]
+            bb.append_instruction("djmp", selector_var, *target_labels)
+    
+    return fuzzer.ctx
+
+
+class MemoryFuzzChecker:
+    """A pluggable checker for memory passes using fuzzing."""
+    
+    def __init__(self, passes: List[type], post_passes: List[type] = None):
+        self.passes = passes
+        self.post_passes = post_passes or []
+    
+    def check_memory_equivalence(self, ctx: IRContext) -> bool:
+        """
+        Check that memory passes preserve semantics by comparing execution.
+        
+        Returns True if optimized and unoptimized versions are equivalent.
+        """
+        try:
+            # Deep copy the context for optimization
+            import copy
+            unoptimized_ctx = copy.deepcopy(ctx)
+            optimized_ctx = copy.deepcopy(ctx)
+            
+            # Apply passes to optimized version
+            for fn in optimized_ctx.functions.values():
+                ac = IRAnalysesCache(fn)
+                for pass_class in self.passes:
+                    pass_obj = pass_class(ac, fn)
+                    pass_obj.run_pass()
+                
+                # Apply post passes
+                for pass_class in self.post_passes:
+                    pass_obj = pass_class(ac, fn)
+                    pass_obj.run_pass()
+            
+            # Use hevm to check equivalence if available
+            try:
+                hevm_check_venom_ctx(unoptimized_ctx, optimized_ctx)
+                return True
+            except Exception as e:
+                # If hevm fails, we assume the optimization broke semantics
+                hp.note(f"HEVM equivalence check failed: {e}")
+                return False
+                
+        except Exception as e:
+            # If optimization fails, skip this test case
+            hp.note(f"Optimization failed: {e}")
+            hp.assume(False)
+            return False
+
+
+# Test with memory-related passes
+@pytest.mark.fuzzing  
+@pytest.mark.parametrize("pass_list", [
+    # Test individual memory passes
+    [__import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass],
+    [__import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass],
+    
+    # Test combinations  
+    [
+        __import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass,
+        __import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass,
+    ],
+])
+@hp.given(ctx=venom_function_with_memory_ops())
+@hp.settings(
+    max_examples=100,
+    suppress_health_check=(
+        hp.HealthCheck.data_too_large,
+        hp.HealthCheck.too_slow,
+        hp.HealthCheck.filter_too_much,
+    ),
+    deadline=None,
+)
+def test_memory_passes_fuzzing(pass_list, ctx):
+    """
+    Property-based test for memory optimization passes.
+    
+    Tests that memory passes preserve semantics by comparing execution
+    between optimized and unoptimized versions.
+    """
+    hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}")
+    
+    # Log the generated IR for debugging
+    if hasattr(ctx, 'functions') and ctx.functions:
+        func = list(ctx.functions.values())[0]
+        hp.note(f"Generated function with {len(func._basic_block_dict)} basic blocks")
+        for bb_name, bb in func._basic_block_dict.items():
+            hp.note(f"Block {bb_name}: {len(bb.instructions)} instructions")
+    
+    checker = MemoryFuzzChecker(pass_list)
+    
+    # The property we're testing: optimization should preserve semantics
+    assert checker.check_memory_equivalence(ctx), "Memory optimization broke semantics"
+
+
+# Utility function for manual testing
+def generate_sample_ir() -> IRContext:
+    """Generate a sample IR for manual inspection."""
+    import random
+    random.seed(42)
+    
+    # Create a hypothesis example
+    ctx = venom_function_with_memory_ops().example()
+    return ctx
+
+
+if __name__ == "__main__":
+    # Example usage
+    ctx = generate_sample_ir()
+    
+    if ctx and ctx.functions:
+        func = list(ctx.functions.values())[0]
+        print(f"Generated function with {len(func._basic_block_dict)} basic blocks:")
+        print(func)
+        
+        # Test with a simple pass
+        try:
+            from vyper.venom.passes.load_elimination import LoadEliminationPass
+            checker = MemoryFuzzChecker([LoadEliminationPass])
+            result = checker.check_memory_equivalence(ctx)
+            print(f"\nEquivalence check result: {result}")
+        except ImportError:
+            print("Could not import LoadEliminationPass for testing")
\ No newline at end of file

From ebdac4df8fc49eee723463b48f5697f707345955 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 15:04:04 +0200
Subject: [PATCH 02/24] improvements / tuning

---
 tests/functional/venom/test_memory_fuzzer.py | 290 +++++++++++--------
 1 file changed, 171 insertions(+), 119 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index bde2174c8c..eda506eb3b 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -6,15 +6,16 @@
 can be plugged with any Venom passes.
 """
 
-import pytest
+from typing import List, Optional, Set
+
 import hypothesis as hp
 import hypothesis.strategies as st
-from typing import List, Optional, Set
+import pytest
 
-from tests.venom_utils import PrePostChecker
 from tests.hevm import hevm_check_venom_ctx
+from tests.venom_utils import PrePostChecker
 from vyper.venom.analysis import IRAnalysesCache
-from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRVariable, IRLiteral, IRLabel
+from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
 from vyper.venom.passes.base_pass import IRPass
@@ -24,15 +25,15 @@
 
 # Precompile addresses for fence operations that generate real data
 PRECOMPILES = {
-    0x1: "ecrecover",      # Returns 32 bytes
-    0x2: "sha256",         # Returns 32 bytes  
-    0x3: "ripemd160",      # Returns 32 bytes
-    0x4: "identity",       # Returns input data
-    0x5: "modexp",         # Returns variable length
-    0x6: "ecadd",          # Returns 64 bytes
-    0x7: "ecmul",          # Returns 64 bytes
-    0x8: "ecpairing",      # Returns 32 bytes
-    0x9: "blake2f",        # Returns 64 bytes
+    0x1: "ecrecover",  # Returns 32 bytes
+    0x2: "sha256",  # Returns 32 bytes
+    0x3: "ripemd160",  # Returns 32 bytes
+    0x4: "identity",  # Returns input data
+    0x5: "modexp",  # Returns variable length
+    0x6: "ecadd",  # Returns 64 bytes
+    0x7: "ecmul",  # Returns 64 bytes
+    0x8: "ecpairing",  # Returns 32 bytes
+    0x9: "blake2f",  # Returns 64 bytes
 }
 
 # Constants for fuzzing
@@ -44,57 +45,91 @@
 
 class MemoryFuzzer:
     """Generates random Venom IR with memory operations using IRBasicBlock API."""
-    
-    def __init__(self, seed_memory: bool = True, allow_params: bool = True):
-        self.seed_memory = seed_memory
-        self.allow_params = allow_params
+
+    def __init__(self):
         self.ctx = IRContext()
         self.function = None
         self.variable_counter = 0
         self.bb_counter = 0
+        self.calldata_offset = 0
         self.available_vars = []  # Variables available for use
-        
+        self.allocated_memory_slots = set()  # Track memory addresses that have been used
+
     def get_next_variable(self) -> IRVariable:
         """Generate a new unique variable."""
         self.variable_counter += 1
         var = IRVariable(f"v{self.variable_counter}")
         self.available_vars.append(var)
         return var
-    
+
+    def ensure_all_vars_have_values(self) -> None:
+        """Ensure all available variables have values by using calldataload for unassigned ones."""
+        # Find all variables that are outputs of instructions
+        assigned_vars = set()
+        for bb in self.function._basic_block_dict.values():
+            for inst in bb.instructions:
+                if inst.output:
+                    assigned_vars.add(inst.output)
+
+        # For variables that don't have values, add calldataload at the beginning
+        entry_bb = self.function.entry
+        unassigned_vars = [var for var in self.available_vars if var not in assigned_vars]
+
+        for i, var in enumerate(unassigned_vars):
+            # Insert calldataload at the beginning of the entry block
+            inst = IRInstruction("calldataload", [IRLiteral(self.calldata_offset)], var)
+            entry_bb.insert_instruction(inst, index=i)
+            self.calldata_offset += 32
+
     def get_next_bb_label(self) -> IRLabel:
-        """Generate a new unique basic block label.""" 
+        """Generate a new unique basic block label."""
         self.bb_counter += 1
         return IRLabel(f"bb{self.bb_counter}")
-    
+
     def get_random_variable(self, draw) -> IRVariable:
         """Get a random available variable or create a new one."""
         if self.available_vars and draw(st.booleans()):
             return draw(st.sampled_from(self.available_vars))
         else:
             return self.get_next_variable()
-    
+
     def get_memory_address(self, draw) -> IRVariable | IRLiteral:
-        """Get a memory address (either variable or aligned literal)."""
+        """Get a memory address, biased towards interesting optimizer-relevant locations."""
+        # 50% chance to use existing variable
         if self.available_vars and draw(st.booleans()):
             return draw(st.sampled_from(self.available_vars))
+
+        # Generate literal address
+        if self.allocated_memory_slots and draw(st.booleans()):
+            # Bias towards addresses near existing allocations
+            base_addr = draw(st.sampled_from(list(self.allocated_memory_slots)))
+
+            # Random offset biased towards edges (0 and 32 are most common)
+            offset = draw(st.integers(min_value=-32, max_value=32))
+            if draw(st.booleans()):  # 50% chance to snap to edge
+                offset = 0 if abs(offset) < 16 else (32 if offset > 0 else -32)
+
+            addr = max(0, min(MAX_MEMORY_SIZE - 32, base_addr + offset))
         else:
-            # Generate aligned memory addresses (multiples of 32)
-            addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32)) & ~31
-            return IRLiteral(addr)
+            # Random address anywhere in memory
+            addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32))
 
+        self.allocated_memory_slots.add(addr)
+        return IRLiteral(addr)
 
-@st.composite  
+
+@st.composite
 def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict:
     """Generate a complex control flow graph structure."""
     num_blocks = draw(st.integers(min_value=2, max_value=max_blocks))
-    
+
     # Create adjacency list representation
     # Block 0 is always the entry, highest numbered block is always the exit
     edges = {}
-    
+
     for i in range(num_blocks):
         edges[i] = []
-    
+
     # Ensure connectivity: each block (except exit) has at least one outgoing edge
     for i in range(num_blocks - 1):
         # Add at least one outgoing edge to ensure no dead blocks
@@ -102,10 +137,10 @@ def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict:
             # Second-to-last block must connect to exit
             edges[i].append(num_blocks - 1)
         else:
-            # Can connect to any later block 
+            # Can connect to any later block
             target = draw(st.integers(min_value=i + 1, max_value=num_blocks - 1))
             edges[i].append(target)
-    
+
     # Add some additional random edges for complexity
     for i in range(num_blocks - 1):
         # Chance to add more outgoing edges
@@ -120,7 +155,7 @@ def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict:
                     if possible_targets:
                         target = draw(st.sampled_from(possible_targets))
                         edges[i].append(target)
-    
+
     return {"num_blocks": num_blocks, "edges": edges}
 
 
@@ -129,39 +164,56 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate and append a memory instruction to current basic block."""
     op = draw(st.sampled_from(MEMORY_OPS))
     bb = fuzzer.current_bb
-    
+
     if op == "mload":
         # %result = mload %addr
         addr = fuzzer.get_memory_address(draw)
         result_var = bb.append_instruction("mload", addr)
-        
+
     elif op == "mstore":
         # mstore %value, %addr
-        value = fuzzer.get_random_variable(draw) if fuzzer.available_vars else IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1)))
+        # Random choice between variable and literal for value
+        if fuzzer.available_vars and draw(st.booleans()):
+            value = draw(st.sampled_from(fuzzer.available_vars))
+        else:
+            value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256 - 1)))
         addr = fuzzer.get_memory_address(draw)
         bb.append_instruction("mstore", value, addr)
-        
+
     elif op == "mcopy":
         # mcopy %dest, %src, %length
         dest = fuzzer.get_memory_address(draw)
         src = fuzzer.get_memory_address(draw)
-        length = IRLiteral(32)  # Copy 32 bytes
-        bb.append_instruction("mcopy", dest, src, length)
+
+        # Bias towards small lengths (more interesting for optimizers)
+        if draw(st.booleans()):
+            # Small lengths (1-96 bytes, biased towards 32-byte multiples)
+            if draw(st.booleans()):
+                length = draw(
+                    st.sampled_from([1, 2, 4, 8, 16, 20, 24, 28, 31, 32, 33, 36, 40, 48, 64, 96])
+                )
+            else:
+                length = draw(st.integers(min_value=1, max_value=96))
+        else:
+            # Larger lengths (up to 1KB)
+            length = draw(st.integers(min_value=97, max_value=1024))
+
+        bb.append_instruction("mcopy", dest, src, IRLiteral(length))
 
 
 @st.composite
 def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate a call to a precompile that produces real output data."""
     bb = fuzzer.current_bb
-    
+
     # Choose a precompile
     precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys())))
     precompile_name = PRECOMPILES[precompile_addr]
-    
+
     # Set up input data in memory
     input_offset = fuzzer.get_memory_address(draw)
     output_offset = fuzzer.get_memory_address(draw)
-    
+
     if precompile_name == "identity":
         # Identity precompile - copies input to output
         input_size = IRLiteral(32)
@@ -182,77 +234,57 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
         # Default case
         input_size = IRLiteral(32)
         output_size = IRLiteral(32)
-    
+
     # Call the precompile
-    gas = IRLiteral(100000)  # Plenty of gas
+    gas = bb.append_instruction("gas")  # Use all available gas
     addr = IRLiteral(precompile_addr)
     value = IRLiteral(0)
-    
-    result_var = bb.append_instruction("staticcall", gas, addr, input_offset, input_size, output_offset, output_size)
-
 
-@st.composite
-def seed_memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
-    """Generate an instruction that seeds memory with data."""
-    bb = fuzzer.current_bb
-    
-    if fuzzer.allow_params:
-        # Use calldataload to get "random" data from parameters
-        offset = IRLiteral(draw(st.integers(min_value=0, max_value=256, step=32)))
-        data_var = bb.append_instruction("calldataload", offset)
-        
-        # Store it in memory
-        mem_addr = fuzzer.get_memory_address(draw)
-        bb.append_instruction("mstore", data_var, mem_addr)
-    else:
-        # Just store a literal value
-        value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1)))
-        mem_addr = fuzzer.get_memory_address(draw)
-        bb.append_instruction("mstore", value, mem_addr)
+    result_var = bb.append_instruction(
+        "staticcall", gas, addr, input_offset, input_size, output_offset, output_size
+    )
 
 
 @st.composite
 def basic_block_instructions(draw, fuzzer: MemoryFuzzer, is_entry: bool = False) -> None:
     """Generate instructions for a basic block."""
-    
-    # For entry block, seed some memory first
-    if is_entry and fuzzer.seed_memory:
-        num_seeds = draw(st.integers(min_value=1, max_value=3))
-        for _ in range(num_seeds):
-            draw(seed_memory_instruction(fuzzer))
-    
+
+    # For entry block, seed memory first
+    if is_entry:
+        bb.append_instruction(
+            "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE)
+        )
+
     # Generate main instructions
     num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK))
-    
+
     for _ in range(num_instructions):
         # Choose instruction type
-        inst_type = draw(st.sampled_from(["memory", "precompile", "seed"]))
-        
+        inst_type = draw(st.sampled_from(["memory", "precompile"]))
+
         if inst_type == "memory":
             draw(memory_instruction(fuzzer))
-        elif inst_type == "precompile": 
+        elif inst_type == "precompile":
             draw(precompile_call(fuzzer))
-        elif inst_type == "seed":
-            draw(seed_memory_instruction(fuzzer))
 
 
 @st.composite
 def venom_function_with_memory_ops(draw) -> IRContext:
     """Generate a complete Venom IR function using IRBasicBlock API."""
-    
-    fuzzer = MemoryFuzzer(seed_memory=True, allow_params=True)
-    
+
+    fuzzer = MemoryFuzzer()
+
     # Create function
     func_name = IRLabel("_fuzz_function", is_symbol=True)
     fuzzer.function = IRFunction(func_name, fuzzer.ctx)
     fuzzer.ctx.functions[func_name] = fuzzer.function
     fuzzer.ctx.entry_function = fuzzer.function
-    
+
     # Generate control flow structure
     cfg = draw(control_flow_graph())
     num_blocks = cfg["num_blocks"]
     edges = cfg["edges"]
-    
+
     # Create all basic blocks first
     basic_blocks = []
     for i in range(num_blocks):
@@ -260,25 +292,25 @@ def venom_function_with_memory_ops(draw) -> IRContext:
             label = IRLabel("entry")
         else:
             label = fuzzer.get_next_bb_label()
-        
+
         bb = IRBasicBlock(label, fuzzer.function)
         fuzzer.function._basic_block_dict[label.value] = bb
         basic_blocks.append(bb)
-    
+
     # Set entry block
     fuzzer.function.entry = basic_blocks[0]
-    
+
     # Generate instructions for each block
     for i, bb in enumerate(basic_blocks):
         fuzzer.current_bb = bb
-        
+
         # Generate block content
-        is_entry = (i == 0)
+        is_entry = i == 0
         draw(basic_block_instructions(fuzzer, is_entry=is_entry))
-        
+
         # Add terminator instruction
         outgoing_edges = edges[i]
-        
+
         if i == num_blocks - 1:
             # Exit block - return memory contents
             bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0))
@@ -294,7 +326,7 @@ def venom_function_with_memory_ops(draw) -> IRContext:
             else:
                 # Load something from memory as condition
                 cond_var = bb.append_instruction("mload", IRLiteral(0))
-            
+
             target1_bb = basic_blocks[outgoing_edges[0]]
             target2_bb = basic_blocks[outgoing_edges[1]]
             bb.append_instruction("jnz", target1_bb.label, target2_bb.label, cond_var)
@@ -304,45 +336,49 @@ def venom_function_with_memory_ops(draw) -> IRContext:
                 selector_var = draw(st.sampled_from(fuzzer.available_vars))
             else:
                 selector_var = bb.append_instruction("mload", IRLiteral(0))
-            
+
             # Create jump table
             target_labels = [basic_blocks[edge].label for edge in outgoing_edges]
             bb.append_instruction("djmp", selector_var, *target_labels)
-    
+
+    # Ensure all variables have values before returning
+    fuzzer.ensure_all_vars_have_values()
+
     return fuzzer.ctx
 
 
 class MemoryFuzzChecker:
     """A pluggable checker for memory passes using fuzzing."""
-    
+
     def __init__(self, passes: List[type], post_passes: List[type] = None):
         self.passes = passes
         self.post_passes = post_passes or []
-    
+
     def check_memory_equivalence(self, ctx: IRContext) -> bool:
         """
         Check that memory passes preserve semantics by comparing execution.
-        
+
         Returns True if optimized and unoptimized versions are equivalent.
         """
         try:
             # Deep copy the context for optimization
             import copy
+
             unoptimized_ctx = copy.deepcopy(ctx)
             optimized_ctx = copy.deepcopy(ctx)
-            
+
             # Apply passes to optimized version
             for fn in optimized_ctx.functions.values():
                 ac = IRAnalysesCache(fn)
                 for pass_class in self.passes:
                     pass_obj = pass_class(ac, fn)
                     pass_obj.run_pass()
-                
+
                 # Apply post passes
                 for pass_class in self.post_passes:
                     pass_obj = pass_class(ac, fn)
                     pass_obj.run_pass()
-            
+
             # Use hevm to check equivalence if available
             try:
                 hevm_check_venom_ctx(unoptimized_ctx, optimized_ctx)
@@ -351,7 +387,7 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool:
                 # If hevm fails, we assume the optimization broke semantics
                 hp.note(f"HEVM equivalence check failed: {e}")
                 return False
-                
+
         except Exception as e:
             # If optimization fails, skip this test case
             hp.note(f"Optimization failed: {e}")
@@ -360,18 +396,32 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool:
 
 
 # Test with memory-related passes
-@pytest.mark.fuzzing  
-@pytest.mark.parametrize("pass_list", [
-    # Test individual memory passes
-    [__import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass],
-    [__import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass],
-    
-    # Test combinations  
+@pytest.mark.fuzzing
+@pytest.mark.parametrize(
+    "pass_list",
     [
-        __import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass,
-        __import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass,
+        # Test individual memory passes
+        [
+            __import__(
+                "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]
+            ).LoadEliminationPass
+        ],
+        [
+            __import__(
+                "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]
+            ).DeadStoreEliminationPass
+        ],
+        # Test combinations
+        [
+            __import__(
+                "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]
+            ).LoadEliminationPass,
+            __import__(
+                "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]
+            ).DeadStoreEliminationPass,
+        ],
     ],
-])
+)
 @hp.given(ctx=venom_function_with_memory_ops())
 @hp.settings(
     max_examples=100,
@@ -385,21 +435,21 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool:
 def test_memory_passes_fuzzing(pass_list, ctx):
     """
     Property-based test for memory optimization passes.
-    
+
     Tests that memory passes preserve semantics by comparing execution
     between optimized and unoptimized versions.
     """
     hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}")
-    
+
     # Log the generated IR for debugging
-    if hasattr(ctx, 'functions') and ctx.functions:
+    if hasattr(ctx, "functions") and ctx.functions:
         func = list(ctx.functions.values())[0]
         hp.note(f"Generated function with {len(func._basic_block_dict)} basic blocks")
         for bb_name, bb in func._basic_block_dict.items():
             hp.note(f"Block {bb_name}: {len(bb.instructions)} instructions")
-    
+
     checker = MemoryFuzzChecker(pass_list)
-    
+
     # The property we're testing: optimization should preserve semantics
     assert checker.check_memory_equivalence(ctx), "Memory optimization broke semantics"
 
@@ -408,8 +458,9 @@ def test_memory_passes_fuzzing(pass_list, ctx):
 def generate_sample_ir() -> IRContext:
     """Generate a sample IR for manual inspection."""
     import random
+
     random.seed(42)
-    
+
     # Create a hypothesis example
     ctx = venom_function_with_memory_ops().example()
     return ctx
@@ -418,17 +469,18 @@ def generate_sample_ir() -> IRContext:
 if __name__ == "__main__":
     # Example usage
     ctx = generate_sample_ir()
-    
+
     if ctx and ctx.functions:
         func = list(ctx.functions.values())[0]
         print(f"Generated function with {len(func._basic_block_dict)} basic blocks:")
         print(func)
-        
+
         # Test with a simple pass
         try:
             from vyper.venom.passes.load_elimination import LoadEliminationPass
+
             checker = MemoryFuzzChecker([LoadEliminationPass])
             result = checker.check_memory_equivalence(ctx)
             print(f"\nEquivalence check result: {result}")
         except ImportError:
-            print("Could not import LoadEliminationPass for testing")
\ No newline at end of file
+            print("Could not import LoadEliminationPass for testing")

From 2c5871b26134ffce3d33b590e9083d607a962439 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 15:11:30 +0200
Subject: [PATCH 03/24] remove dead code

---
 tests/functional/venom/test_memory_fuzzer.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index eda506eb3b..2982b41442 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -13,12 +13,10 @@
 import pytest
 
 from tests.hevm import hevm_check_venom_ctx
-from tests.venom_utils import PrePostChecker
 from vyper.venom.analysis import IRAnalysesCache
 from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
-from vyper.venom.passes.base_pass import IRPass
 
 # Memory operations that can be fuzzed
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
@@ -40,7 +38,6 @@
 MAX_MEMORY_SIZE = 4096  # Limit memory to 4096 bytes
 MAX_BASIC_BLOCKS = 8
 MAX_INSTRUCTIONS_PER_BLOCK = 8
-MAX_VARIABLES = 20
 
 
 class MemoryFuzzer:
@@ -86,13 +83,6 @@ def get_next_bb_label(self) -> IRLabel:
         self.bb_counter += 1
         return IRLabel(f"bb{self.bb_counter}")
 
-    def get_random_variable(self, draw) -> IRVariable:
-        """Get a random available variable or create a new one."""
-        if self.available_vars and draw(st.booleans()):
-            return draw(st.sampled_from(self.available_vars))
-        else:
-            return self.get_next_variable()
-
     def get_memory_address(self, draw) -> IRVariable | IRLiteral:
         """Get a memory address, biased towards interesting optimizer-relevant locations."""
         # 50% chance to use existing variable

From fdea0553cea96b1e07867aecd28cf6f26e71a50a Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 18:12:09 +0200
Subject: [PATCH 04/24] add some cleanup

---
 tests/functional/venom/test_memory_fuzzer.py | 354 +++++++++++--------
 vyper/venom/context.py                       |  25 ++
 2 files changed, 239 insertions(+), 140 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 2982b41442..c06a4ad7c1 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -6,17 +6,17 @@
 can be plugged with any Venom passes.
 """
 
-from typing import List, Optional, Set
-
 import hypothesis as hp
 import hypothesis.strategies as st
 import pytest
 
-from tests.hevm import hevm_check_venom_ctx
 from vyper.venom.analysis import IRAnalysesCache
 from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
+from vyper.venom.passes.dead_store_elimination import DeadStoreEliminationPass
+from vyper.venom.passes.load_elimination import LoadEliminationPass
+from vyper.venom.passes.memmerging import MemMergingPass
 
 # Memory operations that can be fuzzed
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
@@ -35,9 +35,10 @@
 }
 
 # Constants for fuzzing
-MAX_MEMORY_SIZE = 4096  # Limit memory to 4096 bytes
+MAX_MEMORY_SIZE = 4096  # Limit for memory operations
 MAX_BASIC_BLOCKS = 8
 MAX_INSTRUCTIONS_PER_BLOCK = 8
+MAX_LOOP_ITERATIONS = 12  # Maximum iterations before forced loop exit
 
 
 class MemoryFuzzer:
@@ -48,7 +49,7 @@ def __init__(self):
         self.function = None
         self.variable_counter = 0
         self.bb_counter = 0
-        self.calldata_offset = 0
+        self.calldata_offset = MAX_MEMORY_SIZE
         self.available_vars = []  # Variables available for use
         self.allocated_memory_slots = set()  # Track memory addresses that have been used
 
@@ -63,7 +64,7 @@ def ensure_all_vars_have_values(self) -> None:
         """Ensure all available variables have values by using calldataload for unassigned ones."""
         # Find all variables that are outputs of instructions
         assigned_vars = set()
-        for bb in self.function._basic_block_dict.values():
+        for bb in self.function.get_basic_blocks():
             for inst in bb.instructions:
                 if inst.output:
                     assigned_vars.add(inst.output)
@@ -108,47 +109,6 @@ def get_memory_address(self, draw) -> IRVariable | IRLiteral:
         return IRLiteral(addr)
 
 
-@st.composite
-def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict:
-    """Generate a complex control flow graph structure."""
-    num_blocks = draw(st.integers(min_value=2, max_value=max_blocks))
-
-    # Create adjacency list representation
-    # Block 0 is always the entry, highest numbered block is always the exit
-    edges = {}
-
-    for i in range(num_blocks):
-        edges[i] = []
-
-    # Ensure connectivity: each block (except exit) has at least one outgoing edge
-    for i in range(num_blocks - 1):
-        # Add at least one outgoing edge to ensure no dead blocks
-        if i == num_blocks - 2:
-            # Second-to-last block must connect to exit
-            edges[i].append(num_blocks - 1)
-        else:
-            # Can connect to any later block
-            target = draw(st.integers(min_value=i + 1, max_value=num_blocks - 1))
-            edges[i].append(target)
-
-    # Add some additional random edges for complexity
-    for i in range(num_blocks - 1):
-        # Chance to add more outgoing edges
-        if draw(st.booleans()):
-            # Don't create too many edges
-            max_additional = min(2, num_blocks - i - 2)
-            if max_additional > 0:
-                num_additional = draw(st.integers(min_value=0, max_value=max_additional))
-                for _ in range(num_additional):
-                    # Choose a target we're not already connected to
-                    possible_targets = [j for j in range(i + 1, num_blocks) if j not in edges[i]]
-                    if possible_targets:
-                        target = draw(st.sampled_from(possible_targets))
-                        edges[i].append(target)
-
-    return {"num_blocks": num_blocks, "edges": edges}
-
-
 @st.composite
 def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate and append a memory instruction to current basic block."""
@@ -159,6 +119,7 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
         # %result = mload %addr
         addr = fuzzer.get_memory_address(draw)
         result_var = bb.append_instruction("mload", addr)
+        fuzzer.available_vars.append(result_var)
 
     elif op == "mstore":
         # mstore %value, %addr
@@ -191,6 +152,127 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
         bb.append_instruction("mcopy", dest, src, IRLiteral(length))
 
 
+@st.composite
+def control_flow_graph(draw, basic_blocks):
+    """
+    Generate a control flow graph that ensures:
+    1. All blocks are reachable from entry
+    2. No infinite loops (all loops terminate within 12 iterations)
+    3. Proper use of jump and branch instructions
+    """
+    if len(basic_blocks) == 1:
+        # Single block case - must return
+        return {basic_blocks[0]: {"type": "return"}}
+
+    cfg = {}
+    entry_block = basic_blocks[0]
+
+    # Create a spanning tree to ensure all blocks are reachable
+    remaining_blocks = basic_blocks[1:]
+    reachable_blocks = [entry_block]
+
+    # Build spanning tree connections
+    while remaining_blocks:
+        # Pick a random reachable block to connect from
+        source = draw(st.sampled_from(reachable_blocks))
+        # Pick a random unreachable block to connect to
+        target = draw(st.sampled_from(remaining_blocks))
+
+        # Add the target to reachable blocks
+        reachable_blocks.append(target)
+        remaining_blocks.remove(target)
+
+        # Decide if this connection should be a jump or branch
+        if draw(st.booleans()):
+            # Jump connection
+            cfg[source] = {"type": "jump", "target": target}
+        else:
+            # Branch connection - need two targets
+            other_target = draw(st.sampled_from(basic_blocks))
+            cfg[source] = {"type": "branch", "target1": target, "target2": other_target}
+
+    # Now add additional edges for more complex control flow
+    num_additional_edges = draw(st.integers(min_value=0, max_value=len(basic_blocks)))
+    loop_counter_addr = MAX_MEMORY_SIZE  # Start of reserved memory for metadata
+
+    for _ in range(num_additional_edges):
+        source = draw(st.sampled_from(basic_blocks))
+
+        # Skip if already has terminator
+        if source in cfg:
+            continue
+
+        edge_type = draw(st.sampled_from(["jump", "branch"]))
+
+        if edge_type == "jump":
+            target = draw(st.sampled_from(basic_blocks))
+
+            # Check if this creates a back edge (potential loop)
+            is_back_edge = basic_blocks.index(target) <= basic_blocks.index(source)
+
+            if is_back_edge:
+                # For back edges, use a branch with loop counter instead of unconditional jump
+                cfg[source] = {
+                    "type": "branch",
+                    "target1": target,
+                    "target2": draw(st.sampled_from(basic_blocks)),
+                    "is_back_edge": True,
+                    "counter_addr": loop_counter_addr,
+                }
+                loop_counter_addr += 32  # Next loop uses different memory location
+            else:
+                cfg[source] = {"type": "jump", "target": target}
+
+        else:  # branch
+            target1 = draw(st.sampled_from(basic_blocks))
+            target2 = draw(st.sampled_from(basic_blocks))
+
+            # Check if either target creates a back edge
+            is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(source)
+            is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(source)
+
+            cfg[source] = {
+                "type": "branch",
+                "target1": target1,
+                "target2": target2,
+                "is_back_edge": is_back_edge1 or is_back_edge2,
+                "counter_addr": loop_counter_addr if (is_back_edge1 or is_back_edge2) else None,
+            }
+
+            if is_back_edge1 or is_back_edge2:
+                loop_counter_addr += 32
+
+    # Ensure at least one block can return (avoid infinite execution)
+    blocks_without_terminators = [bb for bb in basic_blocks if bb not in cfg]
+    if blocks_without_terminators:
+        # Make some blocks return
+        num_returns = max(1, len(blocks_without_terminators) // 3)
+        return_blocks = draw(
+            st.lists(
+                st.sampled_from(blocks_without_terminators),
+                min_size=num_returns,
+                max_size=num_returns,
+                unique=True,
+            )
+        )
+        for bb in return_blocks:
+            cfg[bb] = {"type": "return"}
+
+        # Add random terminators to remaining blocks
+        remaining = [bb for bb in blocks_without_terminators if bb not in return_blocks]
+        for bb in remaining:
+            terminator_type = draw(st.sampled_from(["jump", "branch"]))
+            if terminator_type == "jump":
+                target = draw(st.sampled_from(basic_blocks))
+                cfg[bb] = {"type": "jump", "target": target}
+            else:
+                target1 = draw(st.sampled_from(basic_blocks))
+                target2 = draw(st.sampled_from(basic_blocks))
+                cfg[bb] = {"type": "branch", "target1": target1, "target2": target2}
+
+    return cfg
+
+
 @st.composite
 def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate a call to a precompile that produces real output data."""
@@ -228,23 +310,16 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
     # Call the precompile
     gas = bb.append_instruction("gas")  # Use all available gas
     addr = IRLiteral(precompile_addr)
-    value = IRLiteral(0)
 
-    result_var = bb.append_instruction(
+    bb.append_instruction(
         "staticcall", gas, addr, input_offset, input_size, output_offset, output_size
     )
 
 
 @st.composite
-def basic_block_instructions(draw, fuzzer: MemoryFuzzer, is_entry: bool = False) -> None:
+def basic_block_instructions(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate instructions for a basic block."""
 
-    # For entry block, seed memory first
-    if is_entry:
-        bb.append_instruction(
-            "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE)
-        )
-
     # Generate main instructions
     num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK))
 
@@ -270,13 +345,10 @@ def venom_function_with_memory_ops(draw) -> IRContext:
     fuzzer.ctx.functions[func_name] = fuzzer.function
     fuzzer.ctx.entry_function = fuzzer.function
 
-    # Generate control flow structure
-    cfg = draw(control_flow_graph())
-    num_blocks = cfg["num_blocks"]
-    edges = cfg["edges"]
-
-    # Create all basic blocks first
+    # Generate blocks
+    num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS))
     basic_blocks = []
+
     for i in range(num_blocks):
         if i == 0:
             label = IRLabel("entry")
@@ -284,52 +356,80 @@ def venom_function_with_memory_ops(draw) -> IRContext:
             label = fuzzer.get_next_bb_label()
 
         bb = IRBasicBlock(label, fuzzer.function)
-        fuzzer.function._basic_block_dict[label.value] = bb
+        fuzzer.function.append_basic_block(bb)
         basic_blocks.append(bb)
 
     # Set entry block
     fuzzer.function.entry = basic_blocks[0]
 
-    # Generate instructions for each block
+    # Create a control flow graph that ensures reachability and loop termination
+    cfg = draw(control_flow_graph(basic_blocks))
+
+    # Initialize memory and loop counters at function entry
+    entry_block = basic_blocks[0]
+    entry_block.append_instruction(
+        "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE)
+    )
+
+    # Extract used counter addresses from CFG and initialize them
+    used_counter_addrs = set()
+    for terminator_info in cfg.values():
+        if terminator_info.get("counter_addr") is not None:
+            addr = terminator_info["counter_addr"]
+            assert addr not in used_counter_addrs, f"Duplicate counter address {addr}"
+            used_counter_addrs.add(addr)
+
+    for addr in used_counter_addrs:
+        entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr))
+
+    # Generate content for each block
     for i, bb in enumerate(basic_blocks):
         fuzzer.current_bb = bb
 
         # Generate block content
-        is_entry = i == 0
-        draw(basic_block_instructions(fuzzer, is_entry=is_entry))
+        draw(basic_block_instructions(fuzzer))
 
-        # Add terminator instruction
-        outgoing_edges = edges[i]
-
-        if i == num_blocks - 1:
-            # Exit block - return memory contents
+        # Add terminators based on the control flow graph
+        terminator_info = cfg[bb]
+        if terminator_info["type"] == "return":
             bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0))
-        elif len(outgoing_edges) == 1:
-            # Single outgoing edge - unconditional jump
-            target_bb = basic_blocks[outgoing_edges[0]]
-            bb.append_instruction("jmp", target_bb.label)
-        elif len(outgoing_edges) == 2:
-            # Two outgoing edges - conditional jump
-            # Create condition based on memory contents or available variable
+        elif terminator_info["type"] == "jump":
+            target = terminator_info["target"]
+            bb.append_instruction("jmp", target.label)
+        elif terminator_info["type"] == "branch":
+            # Use existing variable or create condition
             if fuzzer.available_vars:
                 cond_var = draw(st.sampled_from(fuzzer.available_vars))
             else:
-                # Load something from memory as condition
                 cond_var = bb.append_instruction("mload", IRLiteral(0))
 
-            target1_bb = basic_blocks[outgoing_edges[0]]
-            target2_bb = basic_blocks[outgoing_edges[1]]
-            bb.append_instruction("jnz", target1_bb.label, target2_bb.label, cond_var)
-        else:
-            # Multiple edges - use djmp (dynamic jump table)
-            if fuzzer.available_vars:
-                selector_var = draw(st.sampled_from(fuzzer.available_vars))
+            # Add loop counter check if this is a back edge
+            if terminator_info.get("is_back_edge", False):
+                loop_counter_addr = terminator_info["counter_addr"]
+
+                # Load and increment counter
+                counter = bb.append_instruction("mload", IRLiteral(loop_counter_addr))
+                incremented = bb.append_instruction("add", counter, IRLiteral(1))
+                bb.append_instruction("mstore", incremented, IRLiteral(loop_counter_addr))
+
+                # Check if we should continue looping (counter < MAX_LOOP_ITERATIONS)
+                counter_lt_max = bb.append_instruction(
+                    "lt", incremented, IRLiteral(MAX_LOOP_ITERATIONS)
+                )
+
+                # Normalize original condition to 0 or 1
+                cond_normalized = bb.append_instruction("and", cond_var, IRLiteral(1))
+
+                # Continue loop only if: counter < MAX AND original condition is true
+                combined_cond = bb.append_instruction("and", counter_lt_max, cond_normalized)
+                cond_var = combined_cond
             else:
-                selector_var = bb.append_instruction("mload", IRLiteral(0))
+                # Non-loop branches: just normalize condition to 0 or 1
+                cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
 
-            # Create jump table
-            target_labels = [basic_blocks[edge].label for edge in outgoing_edges]
-            bb.append_instruction("djmp", selector_var, *target_labels)
+            target1 = terminator_info["target1"]
+            target2 = terminator_info["target2"]
+            bb.append_instruction("jnz", target1.label, target2.label, cond_var)
 
     # Ensure all variables have values before returning
     fuzzer.ensure_all_vars_have_values()
@@ -340,22 +440,20 @@ def venom_function_with_memory_ops(draw) -> IRContext:
 class MemoryFuzzChecker:
     """A pluggable checker for memory passes using fuzzing."""
 
-    def __init__(self, passes: List[type], post_passes: List[type] = None):
+    def __init__(self, passes: list[type], post_passes: list[type] = None):
         self.passes = passes
         self.post_passes = post_passes or []
 
     def check_memory_equivalence(self, ctx: IRContext) -> bool:
         """
-        Check that memory passes preserve semantics by comparing execution.
+        Check that memory passes preserve semantics.
 
-        Returns True if optimized and unoptimized versions are equivalent.
+        For now, this just verifies that the passes run without errors.
+        TODO: Implement actual semantic equivalence checking.
         """
         try:
-            # Deep copy the context for optimization
-            import copy
-
-            unoptimized_ctx = copy.deepcopy(ctx)
-            optimized_ctx = copy.deepcopy(ctx)
+            # Copy the context for optimization
+            optimized_ctx = ctx.copy()
 
             # Apply passes to optimized version
             for fn in optimized_ctx.functions.values():
@@ -369,19 +467,12 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool:
                     pass_obj = pass_class(ac, fn)
                     pass_obj.run_pass()
 
-            # Use hevm to check equivalence if available
-            try:
-                hevm_check_venom_ctx(unoptimized_ctx, optimized_ctx)
-                return True
-            except Exception as e:
-                # If hevm fails, we assume the optimization broke semantics
-                hp.note(f"HEVM equivalence check failed: {e}")
-                return False
+            # If we get here, the passes ran successfully
+            return True
 
         except Exception as e:
-            # If optimization fails, skip this test case
+            # If optimization fails, the pass has a bug
             hp.note(f"Optimization failed: {e}")
-            hp.assume(False)
             return False
 
 
@@ -391,25 +482,13 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool:
     "pass_list",
     [
         # Test individual memory passes
-        [
-            __import__(
-                "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]
-            ).LoadEliminationPass
-        ],
-        [
-            __import__(
-                "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]
-            ).DeadStoreEliminationPass
-        ],
+        [LoadEliminationPass],
+        [DeadStoreEliminationPass],
+        [MemMergingPass],
         # Test combinations
-        [
-            __import__(
-                "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]
-            ).LoadEliminationPass,
-            __import__(
-                "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]
-            ).DeadStoreEliminationPass,
-        ],
+        [LoadEliminationPass, DeadStoreEliminationPass],
+        [DeadStoreEliminationPass, LoadEliminationPass],
+        [LoadEliminationPass, MemMergingPass],
     ],
 )
 @hp.given(ctx=venom_function_with_memory_ops())
@@ -434,14 +513,14 @@ def test_memory_passes_fuzzing(pass_list, ctx):
     # Log the generated IR for debugging
     if hasattr(ctx, "functions") and ctx.functions:
         func = list(ctx.functions.values())[0]
-        hp.note(f"Generated function with {len(func._basic_block_dict)} basic blocks")
-        for bb_name, bb in func._basic_block_dict.items():
-            hp.note(f"Block {bb_name}: {len(bb.instructions)} instructions")
+        hp.note(f"Generated function with {func.num_basic_blocks} basic blocks")
+        for bb in func.get_basic_blocks():
+            hp.note(f"Block {bb.label.value}: {len(bb.instructions)} instructions")
 
     checker = MemoryFuzzChecker(pass_list)
 
-    # The property we're testing: optimization should preserve semantics
-    assert checker.check_memory_equivalence(ctx), "Memory optimization broke semantics"
+    # The property we're testing: optimization passes should not crash
+    assert checker.check_memory_equivalence(ctx), "Memory optimization pass crashed"
 
 
 # Utility function for manual testing
@@ -462,15 +541,10 @@ def generate_sample_ir() -> IRContext:
 
     if ctx and ctx.functions:
         func = list(ctx.functions.values())[0]
-        print(f"Generated function with {len(func._basic_block_dict)} basic blocks:")
+        print(f"Generated function with {func.num_basic_blocks} basic blocks:")
         print(func)
 
         # Test with a simple pass
-        try:
-            from vyper.venom.passes.load_elimination import LoadEliminationPass
-
-            checker = MemoryFuzzChecker([LoadEliminationPass])
-            result = checker.check_memory_equivalence(ctx)
-            print(f"\nEquivalence check result: {result}")
-        except ImportError:
-            print("Could not import LoadEliminationPass for testing")
+        checker = MemoryFuzzChecker([LoadEliminationPass])
+        result = checker.check_memory_equivalence(ctx)
+        print(f"\nEquivalence check result: {result}")
diff --git a/vyper/venom/context.py b/vyper/venom/context.py
index 30fac4875d..51c3420852 100644
--- a/vyper/venom/context.py
+++ b/vyper/venom/context.py
@@ -23,6 +23,12 @@ class DataSection:
     label: IRLabel
     data_items: list[DataItem] = field(default_factory=list)
 
+    def copy(self) -> "DataSection":
+        new_section = DataSection(self.label)
+        for item in self.data_items:
+            new_section.data_items.append(DataItem(item.data))
+        return new_section
+
     def __str__(self):
         ret = [f"dbsection {self.label.value}:"]
         for item in self.data_items:
@@ -99,6 +105,25 @@ def append_data_item(self, data: IRLabel | bytes) -> None:
         data_section = self.data_segment[-1]
         data_section.data_items.append(DataItem(data))
 
+    def copy(self) -> "IRContext":
+        new_ctx = IRContext()
+        new_ctx.ctor_mem_size = self.ctor_mem_size
+        new_ctx.immutables_len = self.immutables_len
+        new_ctx.last_label = self.last_label
+        new_ctx.last_variable = self.last_variable
+
+        for label, fn in self.functions.items():
+            new_fn = fn.copy()
+            new_ctx.add_function(new_fn)
+
+        if self.entry_function is not None:
+            new_ctx.entry_function = new_ctx.functions[self.entry_function.name]
+
+        for section in self.data_segment:
+            new_ctx.data_segment.append(section.copy())
+
+        return new_ctx
+
     def as_graph(self) -> str:
         s = ["digraph G {"]
         for fn in self.functions.values():

From 49ecb06806e245d856551d6768e636b03f86e3d3 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 18:15:28 +0200
Subject: [PATCH 05/24] lint

---
 tests/functional/venom/test_memory_fuzzer.py | 2 +-
 vyper/venom/context.py                       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index c06a4ad7c1..b32bb9ed94 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -383,7 +383,7 @@ def venom_function_with_memory_ops(draw) -> IRContext:
         entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr))
 
     # Generate content for each block
-    for i, bb in enumerate(basic_blocks):
+    for bb in basic_blocks:
         fuzzer.current_bb = bb
 
         # Generate block content
diff --git a/vyper/venom/context.py b/vyper/venom/context.py
index 51c3420852..e9be707fa7 100644
--- a/vyper/venom/context.py
+++ b/vyper/venom/context.py
@@ -112,7 +112,7 @@ def copy(self) -> "IRContext":
         new_ctx.last_label = self.last_label
         new_ctx.last_variable = self.last_variable
 
-        for label, fn in self.functions.items():
+        for fn in self.functions.values():
             new_fn = fn.copy()
             new_ctx.add_function(new_fn)
 

From 2791a82d2abda55dc1f351aad28c697b15fc0511 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 19:27:36 +0200
Subject: [PATCH 06/24] polishing, refactoring

---
 tests/functional/venom/test_memory_fuzzer.py | 380 +++++++++----------
 1 file changed, 179 insertions(+), 201 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index b32bb9ed94..1413b1b8d3 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -6,6 +6,9 @@
 can be plugged with any Venom passes.
 """
 
+from dataclasses import dataclass
+from typing import Optional
+
 import hypothesis as hp
 import hypothesis.strategies as st
 import pytest
@@ -18,27 +21,56 @@
 from vyper.venom.passes.load_elimination import LoadEliminationPass
 from vyper.venom.passes.memmerging import MemMergingPass
 
-# Memory operations that can be fuzzed
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
 
-# Precompile addresses for fence operations that generate real data
+# precompiles act as fence operations that generate real output data,
+# preventing optimizers from eliminating memory operations
 PRECOMPILES = {
-    0x1: "ecrecover",  # Returns 32 bytes
-    0x2: "sha256",  # Returns 32 bytes
-    0x3: "ripemd160",  # Returns 32 bytes
-    0x4: "identity",  # Returns input data
-    0x5: "modexp",  # Returns variable length
-    0x6: "ecadd",  # Returns 64 bytes
-    0x7: "ecmul",  # Returns 64 bytes
-    0x8: "ecpairing",  # Returns 32 bytes
-    0x9: "blake2f",  # Returns 64 bytes
+    0x1: "ecrecover",
+    0x2: "sha256",
+    0x3: "ripemd160",
+    0x4: "identity",
+    0x5: "modexp",
+    0x6: "ecadd",
+    0x7: "ecmul",
+    0x8: "ecpairing",
+    0x9: "blake2f",
 }
 
-# Constants for fuzzing
-MAX_MEMORY_SIZE = 4096  # Limit for memory operations
+MAX_MEMORY_SIZE = 4096
 MAX_BASIC_BLOCKS = 8
 MAX_INSTRUCTIONS_PER_BLOCK = 8
-MAX_LOOP_ITERATIONS = 12  # Maximum iterations before forced loop exit
+MAX_LOOP_ITERATIONS = 12
+
+
+@dataclass
+class _BBType:
+    """Base class for basic block types in the CFG."""
+    pass
+
+
+@dataclass
+class _ReturnBB(_BBType):
+    """Basic block that returns."""
+    pass
+
+
+@dataclass
+class _JumpBB(_BBType):
+    """Basic block with unconditional jump."""
+    target: IRBasicBlock
+
+
+@dataclass
+class _BranchBB(_BBType):
+    """Basic block with conditional branch."""
+    target1: IRBasicBlock
+    target2: IRBasicBlock
+    counter_addr: Optional[int] = None
+    
+    @property
+    def is_back_edge(self) -> bool:
+        return self.counter_addr is not None
 
 
 class MemoryFuzzer:
@@ -50,8 +82,8 @@ def __init__(self):
         self.variable_counter = 0
         self.bb_counter = 0
         self.calldata_offset = MAX_MEMORY_SIZE
-        self.available_vars = []  # Variables available for use
-        self.allocated_memory_slots = set()  # Track memory addresses that have been used
+        self.available_vars = []
+        self.allocated_memory_slots = set()
 
     def get_next_variable(self) -> IRVariable:
         """Generate a new unique variable."""
@@ -62,19 +94,16 @@ def get_next_variable(self) -> IRVariable:
 
     def ensure_all_vars_have_values(self) -> None:
         """Ensure all available variables have values by using calldataload for unassigned ones."""
-        # Find all variables that are outputs of instructions
         assigned_vars = set()
         for bb in self.function.get_basic_blocks():
             for inst in bb.instructions:
                 if inst.output:
                     assigned_vars.add(inst.output)
 
-        # For variables that don't have values, add calldataload at the beginning
         entry_bb = self.function.entry
         unassigned_vars = [var for var in self.available_vars if var not in assigned_vars]
 
         for i, var in enumerate(unassigned_vars):
-            # Insert calldataload at the beginning of the entry block
             inst = IRInstruction("calldataload", [IRLiteral(self.calldata_offset)], var)
             entry_bb.insert_instruction(inst, index=i)
             self.calldata_offset += 32
@@ -86,29 +115,41 @@ def get_next_bb_label(self) -> IRLabel:
 
     def get_memory_address(self, draw) -> IRVariable | IRLiteral:
         """Get a memory address, biased towards interesting optimizer-relevant locations."""
-        # 50% chance to use existing variable
         if self.available_vars and draw(st.booleans()):
             return draw(st.sampled_from(self.available_vars))
 
-        # Generate literal address
         if self.allocated_memory_slots and draw(st.booleans()):
-            # Bias towards addresses near existing allocations
+            # bias towards addresses near existing allocations to create aliasing opportunities
             base_addr = draw(st.sampled_from(list(self.allocated_memory_slots)))
 
-            # Random offset biased towards edges (0 and 32 are most common)
             offset = draw(st.integers(min_value=-32, max_value=32))
-            if draw(st.booleans()):  # 50% chance to snap to edge
+            if draw(st.booleans()):
+                # snap to word boundaries for more interesting aliasing patterns
                 offset = 0 if abs(offset) < 16 else (32 if offset > 0 else -32)
 
             addr = max(0, min(MAX_MEMORY_SIZE - 32, base_addr + offset))
         else:
-            # Random address anywhere in memory
             addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32))
 
         self.allocated_memory_slots.add(addr)
         return IRLiteral(addr)
 
 
+@st.composite
+def copy_length(draw) -> int:
+    """Generate a length suitable for a copy operation."""
+    if draw(st.booleans()):
+        # small lengths are more interesting for optimizer edge cases
+        if draw(st.booleans()):
+            return draw(
+                st.sampled_from([1, 2, 4, 8, 16, 20, 24, 28, 31, 32, 33, 36, 40, 48, 64, 96])
+            )
+        else:
+            return draw(st.integers(min_value=1, max_value=96))
+    else:
+        return draw(st.integers(min_value=97, max_value=1024))
+
+
 @st.composite
 def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate and append a memory instruction to current basic block."""
@@ -116,14 +157,11 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
     bb = fuzzer.current_bb
 
     if op == "mload":
-        # %result = mload %addr
         addr = fuzzer.get_memory_address(draw)
         result_var = bb.append_instruction("mload", addr)
         fuzzer.available_vars.append(result_var)
 
     elif op == "mstore":
-        # mstore %value, %addr
-        # Random choice between variable and literal for value
         if fuzzer.available_vars and draw(st.booleans()):
             value = draw(st.sampled_from(fuzzer.available_vars))
         else:
@@ -132,25 +170,14 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
         bb.append_instruction("mstore", value, addr)
 
     elif op == "mcopy":
-        # mcopy %dest, %src, %length
         dest = fuzzer.get_memory_address(draw)
         src = fuzzer.get_memory_address(draw)
-
-        # Bias towards small lengths (more interesting for optimizers)
-        if draw(st.booleans()):
-            # Small lengths (1-96 bytes, biased towards 32-byte multiples)
-            if draw(st.booleans()):
-                length = draw(
-                    st.sampled_from([1, 2, 4, 8, 16, 20, 24, 28, 31, 32, 33, 36, 40, 48, 64, 96])
-                )
-            else:
-                length = draw(st.integers(min_value=1, max_value=96))
-        else:
-            # Larger lengths (up to 1KB)
-            length = draw(st.integers(min_value=97, max_value=1024))
-
+        length = draw(copy_length())
         bb.append_instruction("mcopy", dest, src, IRLiteral(length))
 
+    else:
+        raise ValueError("unreachable")
+
 
 @st.composite
 def control_flow_graph(draw, basic_blocks):
@@ -161,44 +188,35 @@ def control_flow_graph(draw, basic_blocks):
     3. Proper use of jump and branch instructions
     """
     if len(basic_blocks) == 1:
-        # Single block case - must return
-        return {basic_blocks[0]: {"type": "return"}}
+        return {basic_blocks[0]: _ReturnBB()}
 
-    cfg = {}
+    cfg: dict[IRBasicBlock, _BBType] = {}
     entry_block = basic_blocks[0]
 
-    # Create a spanning tree to ensure all blocks are reachable
+    # create a spanning tree to ensure all blocks are reachable
     remaining_blocks = basic_blocks[1:]
     reachable_blocks = [entry_block]
 
-    # Build spanning tree connections
     while remaining_blocks:
-        # Pick a random reachable block to connect from
         source = draw(st.sampled_from(reachable_blocks))
-        # Pick a random unreachable block to connect to
         target = draw(st.sampled_from(remaining_blocks))
 
-        # Add the target to reachable blocks
         reachable_blocks.append(target)
         remaining_blocks.remove(target)
 
-        # Decide if this connection should be a jump or branch
         if draw(st.booleans()):
-            # Jump connection
-            cfg[source] = {"type": "jump", "target": target}
+            cfg[source] = _JumpBB(target=target)
         else:
-            # Branch connection - need two targets
             other_target = draw(st.sampled_from(basic_blocks))
-            cfg[source] = {"type": "branch", "target1": target, "target2": other_target}
+            cfg[source] = _BranchBB(target1=target, target2=other_target)
 
-    # Now add additional edges for more complex control flow
+    # add additional edges for more complex control flow
     num_additional_edges = draw(st.integers(min_value=0, max_value=len(basic_blocks)))
-    loop_counter_addr = MAX_MEMORY_SIZE  # Start of reserved memory for metadata
+    loop_counter_addr = MAX_MEMORY_SIZE
 
     for _ in range(num_additional_edges):
         source = draw(st.sampled_from(basic_blocks))
 
-        # Skip if already has terminator
         if source in cfg:
             continue
 
@@ -207,68 +225,59 @@ def control_flow_graph(draw, basic_blocks):
         if edge_type == "jump":
             target = draw(st.sampled_from(basic_blocks))
 
-            # Check if this creates a back edge (potential loop)
             is_back_edge = basic_blocks.index(target) <= basic_blocks.index(source)
 
             if is_back_edge:
-                # For back edges, use a branch with loop counter instead of unconditional jump
-                cfg[source] = {
-                    "type": "branch",
-                    "target1": target,
-                    "target2": draw(st.sampled_from(basic_blocks)),
-                    "is_back_edge": True,
-                    "counter_addr": loop_counter_addr,
-                }
-                loop_counter_addr += 32  # Next loop uses different memory location
+                # back edges need loop counters to prevent infinite loops
+                cfg[source] = _BranchBB(
+                    target1=target,
+                    target2=draw(st.sampled_from(basic_blocks)),
+                    counter_addr=loop_counter_addr,
+                )
+                loop_counter_addr += 32
             else:
-                cfg[source] = {"type": "jump", "target": target}
+                cfg[source] = _JumpBB(target=target)
 
         else:  # branch
             target1 = draw(st.sampled_from(basic_blocks))
             target2 = draw(st.sampled_from(basic_blocks))
 
-            # Check if either target creates a back edge
             is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(source)
             is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(source)
 
-            cfg[source] = {
-                "type": "branch",
-                "target1": target1,
-                "target2": target2,
-                "is_back_edge": is_back_edge1 or is_back_edge2,
-                "counter_addr": loop_counter_addr if (is_back_edge1 or is_back_edge2) else None,
-            }
+            cfg[source] = _BranchBB(
+                target1=target1,
+                target2=target2,
+                counter_addr=loop_counter_addr if (is_back_edge1 or is_back_edge2) else None,
+            )
 
             if is_back_edge1 or is_back_edge2:
                 loop_counter_addr += 32
 
-    # Ensure at least one block can return (avoid infinite execution)
-    blocks_without_terminators = [bb for bb in basic_blocks if bb not in cfg]
-    if blocks_without_terminators:
-        # Make some blocks return
-        num_returns = max(1, len(blocks_without_terminators) // 3)
+    # ensure at least one block can return
+    remaining_blocks = [bb for bb in basic_blocks if bb not in cfg]
+    if remaining_blocks:
         return_blocks = draw(
             st.lists(
-                st.sampled_from(blocks_without_terminators),
-                min_size=num_returns,
-                max_size=num_returns,
+                st.sampled_from(remaining_blocks),
+                min_size=1,
+                max_size=len(remaining_blocks),
                 unique=True,
             )
         )
         for bb in return_blocks:
-            cfg[bb] = {"type": "return"}
-
-        # Add random terminators to remaining blocks
-        remaining = [bb for bb in blocks_without_terminators if bb not in return_blocks]
-        for bb in remaining:
-            terminator_type = draw(st.sampled_from(["jump", "branch"]))
-            if terminator_type == "jump":
-                target = draw(st.sampled_from(basic_blocks))
-                cfg[bb] = {"type": "jump", "target": target}
-            else:
-                target1 = draw(st.sampled_from(basic_blocks))
-                target2 = draw(st.sampled_from(basic_blocks))
-                cfg[bb] = {"type": "branch", "target1": target1, "target2": target2}
+            cfg[bb] = _ReturnBB()
+
+    remaining = [bb for bb in basic_blocks if bb not in cfg]
+    for bb in remaining:
+        terminator_type = draw(st.sampled_from(["jump", "branch"]))
+        if terminator_type == "jump":
+            target = draw(st.sampled_from(basic_blocks))
+            cfg[bb] = _JumpBB(target=target)
+        else:
+            target1 = draw(st.sampled_from(basic_blocks))
+            target2 = draw(st.sampled_from(basic_blocks))
+            cfg[bb] = _BranchBB(target1=target1, target2=target2)
 
     return cfg
 
@@ -278,74 +287,78 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate a call to a precompile that produces real output data."""
     bb = fuzzer.current_bb
 
-    # Choose a precompile
     precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys())))
     precompile_name = PRECOMPILES[precompile_addr]
 
-    # Set up input data in memory
-    input_offset = fuzzer.get_memory_address(draw)
-    output_offset = fuzzer.get_memory_address(draw)
+    input_ofst = fuzzer.get_memory_address(draw)
+    output_ofst = fuzzer.get_memory_address(draw)
 
-    if precompile_name == "identity":
-        # Identity precompile - copies input to output
-        input_size = IRLiteral(32)
+    if precompile_name == "ecrecover":
+        input_size = IRLiteral(128)  # v, r, s, hash
         output_size = IRLiteral(32)
     elif precompile_name == "sha256":
-        # SHA256 - takes any input, outputs 32 bytes
-        input_size = IRLiteral(64)  # Use 64 bytes input
+        input_size = IRLiteral(64)
         output_size = IRLiteral(32)
-    elif precompile_name == "blake2f":
-        # Blake2f - outputs 64 bytes
-        input_size = IRLiteral(213)  # Blake2f requires 213 bytes input
+    elif precompile_name == "ripemd160":
+        input_size = IRLiteral(64)
+        output_size = IRLiteral(32)
+    elif precompile_name == "identity":
+        # identity copies min(input_size, output_size) bytes
+        input_size = IRLiteral(draw(copy_length()))
+        output_size = IRLiteral(draw(copy_length()))
+    elif precompile_name == "modexp":
+        input_size = IRLiteral(96)  # minimal: base_len, exp_len, mod_len
+        output_size = IRLiteral(32)
+    elif precompile_name == "ecadd":
+        input_size = IRLiteral(128)  # two EC points (x1, y1, x2, y2)
         output_size = IRLiteral(64)
-    elif precompile_name in ["ecadd", "ecmul"]:
-        # EC operations - specific input/output sizes
-        input_size = IRLiteral(96)  # EC point operations
+    elif precompile_name == "ecmul":
+        input_size = IRLiteral(96)  # EC point (x, y) and scalar
         output_size = IRLiteral(64)
-    else:
-        # Default case
-        input_size = IRLiteral(32)
+    elif precompile_name == "ecpairing":
+        input_size = IRLiteral(192)  # minimal: one pair of G1 and G2 points
         output_size = IRLiteral(32)
+    elif precompile_name == "blake2f":
+        input_size = IRLiteral(213)  # blake2f requires specific input size
+        output_size = IRLiteral(64)
+    else:
+        # unreachable
+        raise Exception(f"Unknown precompile: {precompile_name}")
 
-    # Call the precompile
-    gas = bb.append_instruction("gas")  # Use all available gas
+    gas = bb.append_instruction("gas")
     addr = IRLiteral(precompile_addr)
 
     bb.append_instruction(
-        "staticcall", gas, addr, input_offset, input_size, output_offset, output_size
+        "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size
     )
 
 
 @st.composite
 def basic_block_instructions(draw, fuzzer: MemoryFuzzer) -> None:
     """Generate instructions for a basic block."""
-
-    # Generate main instructions
     num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK))
 
     for _ in range(num_instructions):
-        # Choose instruction type
         inst_type = draw(st.sampled_from(["memory", "precompile"]))
 
         if inst_type == "memory":
             draw(memory_instruction(fuzzer))
         elif inst_type == "precompile":
             draw(precompile_call(fuzzer))
+        else:
+            raise Exception("unreachable")
 
 
 @st.composite
 def venom_function_with_memory_ops(draw) -> IRContext:
     """Generate a complete Venom IR function using IRBasicBlock API."""
-
     fuzzer = MemoryFuzzer()
 
-    # Create function
     func_name = IRLabel("_fuzz_function", is_symbol=True)
     fuzzer.function = IRFunction(func_name, fuzzer.ctx)
     fuzzer.ctx.functions[func_name] = fuzzer.function
     fuzzer.ctx.entry_function = fuzzer.function
 
-    # Generate blocks
     num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS))
     basic_blocks = []
 
@@ -359,79 +372,66 @@ def venom_function_with_memory_ops(draw) -> IRContext:
         fuzzer.function.append_basic_block(bb)
         basic_blocks.append(bb)
 
-    # Set entry block
     fuzzer.function.entry = basic_blocks[0]
 
-    # Create a control flow graph that ensures reachability and loop termination
     cfg = draw(control_flow_graph(basic_blocks))
 
-    # Initialize memory and loop counters at function entry
     entry_block = basic_blocks[0]
     entry_block.append_instruction(
         "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE)
     )
 
-    # Extract used counter addresses from CFG and initialize them
+    # extract loop counter addresses and initialize them
     used_counter_addrs = set()
-    for terminator_info in cfg.values():
-        if terminator_info.get("counter_addr") is not None:
-            addr = terminator_info["counter_addr"]
+    for bb_type in cfg.values():
+        if isinstance(bb_type, _BranchBB) and bb_type.counter_addr is not None:
+            addr = bb_type.counter_addr
             assert addr not in used_counter_addrs, f"Duplicate counter address {addr}"
             used_counter_addrs.add(addr)
 
     for addr in used_counter_addrs:
         entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr))
 
-    # Generate content for each block
     for bb in basic_blocks:
         fuzzer.current_bb = bb
 
-        # Generate block content
         draw(basic_block_instructions(fuzzer))
 
-        # Add terminators based on the control flow graph
-        terminator_info = cfg[bb]
-        if terminator_info["type"] == "return":
+        bb_type = cfg[bb]
+        
+        if isinstance(bb_type, _ReturnBB):
             bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0))
-        elif terminator_info["type"] == "jump":
-            target = terminator_info["target"]
-            bb.append_instruction("jmp", target.label)
-        elif terminator_info["type"] == "branch":
-            # Use existing variable or create condition
+            
+        elif isinstance(bb_type, _JumpBB):
+            bb.append_instruction("jmp", bb_type.target.label)
+            
+        elif isinstance(bb_type, _BranchBB):
             if fuzzer.available_vars:
                 cond_var = draw(st.sampled_from(fuzzer.available_vars))
             else:
                 cond_var = bb.append_instruction("mload", IRLiteral(0))
 
-            # Add loop counter check if this is a back edge
-            if terminator_info.get("is_back_edge", False):
-                loop_counter_addr = terminator_info["counter_addr"]
+            cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
+
+            if bb_type.is_back_edge:
+                loop_counter_addr = bb_type.counter_addr
 
-                # Load and increment counter
                 counter = bb.append_instruction("mload", IRLiteral(loop_counter_addr))
-                incremented = bb.append_instruction("add", counter, IRLiteral(1))
-                bb.append_instruction("mstore", incremented, IRLiteral(loop_counter_addr))
+                counter = bb.append_instruction("add", counter, IRLiteral(1))
+                bb.append_instruction("mstore", counter, IRLiteral(loop_counter_addr))
 
-                # Check if we should continue looping (counter < MAX_LOOP_ITERATIONS)
+                # continue loop only if: counter < MAX_LOOP_ITERATIONS AND original condition
                 counter_lt_max = bb.append_instruction(
                     "lt", incremented, IRLiteral(MAX_LOOP_ITERATIONS)
                 )
 
-                # Normalize original condition to 0 or 1
-                cond_normalized = bb.append_instruction("and", cond_var, IRLiteral(1))
+                cond_var = bb.append_instruction("and", counter_lt_max, cond_var)
 
-                # Continue loop only if: counter < MAX AND original condition is true
-                combined_cond = bb.append_instruction("and", counter_lt_max, cond_normalized)
-                cond_var = combined_cond
-            else:
-                # Non-loop branches: just normalize condition to 0 or 1
-                cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
-
-            target1 = terminator_info["target1"]
-            target2 = terminator_info["target2"]
-            bb.append_instruction("jnz", target1.label, target2.label, cond_var)
+            bb.append_instruction("jnz", bb_type.target1.label, bb_type.target2.label, cond_var)
+            
+        else:
+            raise Exception() # unreachable
 
-    # Ensure all variables have values before returning
     fuzzer.ensure_all_vars_have_values()
 
     return fuzzer.ctx
@@ -444,36 +444,23 @@ def __init__(self, passes: list[type], post_passes: list[type] = None):
         self.passes = passes
         self.post_passes = post_passes or []
 
-    def check_memory_equivalence(self, ctx: IRContext) -> bool:
+    def run_passes(self, ctx: IRContext) -> None:
         """
-        Check that memory passes preserve semantics.
+        Run optimization passes on the IR context.
 
-        For now, this just verifies that the passes run without errors.
-        TODO: Implement actual semantic equivalence checking.
+        This method lets exceptions bubble up so Hypothesis can handle them properly.
         """
-        try:
-            # Copy the context for optimization
-            optimized_ctx = ctx.copy()
-
-            # Apply passes to optimized version
-            for fn in optimized_ctx.functions.values():
-                ac = IRAnalysesCache(fn)
-                for pass_class in self.passes:
-                    pass_obj = pass_class(ac, fn)
-                    pass_obj.run_pass()
+        optimized_ctx = ctx.copy()
 
-                # Apply post passes
-                for pass_class in self.post_passes:
-                    pass_obj = pass_class(ac, fn)
-                    pass_obj.run_pass()
+        for fn in optimized_ctx.functions.values():
+            ac = IRAnalysesCache(fn)
+            for pass_class in self.passes:
+                pass_obj = pass_class(ac, fn)
+                pass_obj.run_pass()
 
-            # If we get here, the passes ran successfully
-            return True
-
-        except Exception as e:
-            # If optimization fails, the pass has a bug
-            hp.note(f"Optimization failed: {e}")
-            return False
+            for pass_class in self.post_passes:
+                pass_obj = pass_class(ac, fn)
+                pass_obj.run_pass()
 
 
 # Test with memory-related passes
@@ -505,12 +492,10 @@ def test_memory_passes_fuzzing(pass_list, ctx):
     """
     Property-based test for memory optimization passes.
 
-    Tests that memory passes preserve semantics by comparing execution
-    between optimized and unoptimized versions.
+    Tests that memory passes do not crash on complex IR.
     """
     hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}")
 
-    # Log the generated IR for debugging
     if hasattr(ctx, "functions") and ctx.functions:
         func = list(ctx.functions.values())[0]
         hp.note(f"Generated function with {func.num_basic_blocks} basic blocks")
@@ -518,25 +503,19 @@ def test_memory_passes_fuzzing(pass_list, ctx):
             hp.note(f"Block {bb.label.value}: {len(bb.instructions)} instructions")
 
     checker = MemoryFuzzChecker(pass_list)
-
-    # The property we're testing: optimization passes should not crash
-    assert checker.check_memory_equivalence(ctx), "Memory optimization pass crashed"
+    checker.run_passes(ctx)
 
 
-# Utility function for manual testing
 def generate_sample_ir() -> IRContext:
     """Generate a sample IR for manual inspection."""
     import random
 
     random.seed(42)
-
-    # Create a hypothesis example
     ctx = venom_function_with_memory_ops().example()
     return ctx
 
 
 if __name__ == "__main__":
-    # Example usage
     ctx = generate_sample_ir()
 
     if ctx and ctx.functions:
@@ -544,7 +523,6 @@ def generate_sample_ir() -> IRContext:
         print(f"Generated function with {func.num_basic_blocks} basic blocks:")
         print(func)
 
-        # Test with a simple pass
         checker = MemoryFuzzChecker([LoadEliminationPass])
-        result = checker.check_memory_equivalence(ctx)
-        print(f"\nEquivalence check result: {result}")
+        checker.run_passes(ctx)
+        print("\nPasses completed successfully")

From 193c5d49e587e108d6f8b527451ae08f9c9c47ae Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 20:22:38 +0200
Subject: [PATCH 07/24] make things compile

---
 tests/functional/venom/test_memory_fuzzer.py | 210 ++++++++-----------
 1 file changed, 90 insertions(+), 120 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 1413b1b8d3..60c6c03c31 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -17,9 +17,7 @@
 from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
-from vyper.venom.passes.dead_store_elimination import DeadStoreEliminationPass
-from vyper.venom.passes.load_elimination import LoadEliminationPass
-from vyper.venom.passes.memmerging import MemMergingPass
+from vyper.venom.passes import DeadStoreElimination,LoadElimination, MemMergePass
 
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
 
@@ -46,30 +44,34 @@
 @dataclass
 class _BBType:
     """Base class for basic block types in the CFG."""
+
     pass
 
 
 @dataclass
 class _ReturnBB(_BBType):
     """Basic block that returns."""
+
     pass
 
 
 @dataclass
 class _JumpBB(_BBType):
     """Basic block with unconditional jump."""
+
     target: IRBasicBlock
 
 
 @dataclass
 class _BranchBB(_BBType):
     """Basic block with conditional branch."""
+
     target1: IRBasicBlock
     target2: IRBasicBlock
     counter_addr: Optional[int] = None
-    
+
     @property
-    def is_back_edge(self) -> bool:
+    def has_back_edge(self) -> bool:
         return self.counter_addr is not None
 
 
@@ -113,6 +115,13 @@ def get_next_bb_label(self) -> IRLabel:
         self.bb_counter += 1
         return IRLabel(f"bb{self.bb_counter}")
 
+    def get_random_variable(self, draw) -> IRVariable:
+        """Get a random available variable or create a new one."""
+        if self.available_vars and draw(st.booleans()):
+            return draw(st.sampled_from(self.available_vars))
+        else:
+            return self.get_next_variable()
+
     def get_memory_address(self, draw) -> IRVariable | IRLiteral:
         """Get a memory address, biased towards interesting optimizer-relevant locations."""
         if self.available_vars and draw(st.booleans()):
@@ -151,10 +160,9 @@ def copy_length(draw) -> int:
 
 
 @st.composite
-def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None:
+def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
     """Generate and append a memory instruction to current basic block."""
     op = draw(st.sampled_from(MEMORY_OPS))
-    bb = fuzzer.current_bb
 
     if op == "mload":
         addr = fuzzer.get_memory_address(draw)
@@ -187,20 +195,25 @@ def control_flow_graph(draw, basic_blocks):
     2. No infinite loops (all loops terminate within 12 iterations)
     3. Proper use of jump and branch instructions
     """
-    if len(basic_blocks) == 1:
-        return {basic_blocks[0]: _ReturnBB()}
-
     cfg: dict[IRBasicBlock, _BBType] = {}
-    entry_block = basic_blocks[0]
+
+    # last block is always a return block - guarantees all other blocks have forward targets
+    cfg[basic_blocks[-1]] = _ReturnBB()
+
+    # cache forward targets for each block for performance
+    forward_targets = {}
+    for i, bb in enumerate(basic_blocks):
+        forward_targets[bb] = basic_blocks[i + 1 :]
 
     # create a spanning tree to ensure all blocks are reachable
-    remaining_blocks = basic_blocks[1:]
-    reachable_blocks = [entry_block]
+    remaining_blocks = basic_blocks[1:]  # exclude entry block
+    reachable_blocks = [basic_blocks[0]]
 
     while remaining_blocks:
         source = draw(st.sampled_from(reachable_blocks))
         target = draw(st.sampled_from(remaining_blocks))
 
+        # target is now reachable, but it may not be in cfg yet
         reachable_blocks.append(target)
         remaining_blocks.remove(target)
 
@@ -210,82 +223,45 @@ def control_flow_graph(draw, basic_blocks):
             other_target = draw(st.sampled_from(basic_blocks))
             cfg[source] = _BranchBB(target1=target, target2=other_target)
 
-    # add additional edges for more complex control flow
-    num_additional_edges = draw(st.integers(min_value=0, max_value=len(basic_blocks)))
+    # classify remaining blocks that were not handled during spanning
+    # tree construction.
     loop_counter_addr = MAX_MEMORY_SIZE
 
-    for _ in range(num_additional_edges):
-        source = draw(st.sampled_from(basic_blocks))
-
-        if source in cfg:
+    for bb in basic_blocks:
+        if bb in cfg:
             continue
 
         edge_type = draw(st.sampled_from(["jump", "branch"]))
 
         if edge_type == "jump":
-            target = draw(st.sampled_from(basic_blocks))
-
-            is_back_edge = basic_blocks.index(target) <= basic_blocks.index(source)
-
-            if is_back_edge:
-                # back edges need loop counters to prevent infinite loops
-                cfg[source] = _BranchBB(
-                    target1=target,
-                    target2=draw(st.sampled_from(basic_blocks)),
-                    counter_addr=loop_counter_addr,
-                )
-                loop_counter_addr += 32
-            else:
-                cfg[source] = _JumpBB(target=target)
-
+            target = draw(st.sampled_from(forward_targets[bb]))
+            cfg[bb] = _JumpBB(target=target)
         else:  # branch
             target1 = draw(st.sampled_from(basic_blocks))
             target2 = draw(st.sampled_from(basic_blocks))
 
-            is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(source)
-            is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(source)
+            is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(bb)
+            is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(bb)
 
-            cfg[source] = _BranchBB(
-                target1=target1,
-                target2=target2,
-                counter_addr=loop_counter_addr if (is_back_edge1 or is_back_edge2) else None,
-            )
+            if is_back_edge1 and is_back_edge2:
+                # ensure at least one target provides forward progress
+                target2 = draw(st.sampled_from(forward_targets[bb]))
+                is_back_edge2 = False
 
-            if is_back_edge1 or is_back_edge2:
-                loop_counter_addr += 32
+            contains_back_edge = is_back_edge1 or is_back_edge2
+            counter_addr = loop_counter_addr if contains_back_edge else None
 
-    # ensure at least one block can return
-    remaining_blocks = [bb for bb in basic_blocks if bb not in cfg]
-    if remaining_blocks:
-        return_blocks = draw(
-            st.lists(
-                st.sampled_from(remaining_blocks),
-                min_size=1,
-                max_size=len(remaining_blocks),
-                unique=True,
-            )
-        )
-        for bb in return_blocks:
-            cfg[bb] = _ReturnBB()
-
-    remaining = [bb for bb in basic_blocks if bb not in cfg]
-    for bb in remaining:
-        terminator_type = draw(st.sampled_from(["jump", "branch"]))
-        if terminator_type == "jump":
-            target = draw(st.sampled_from(basic_blocks))
-            cfg[bb] = _JumpBB(target=target)
-        else:
-            target1 = draw(st.sampled_from(basic_blocks))
-            target2 = draw(st.sampled_from(basic_blocks))
-            cfg[bb] = _BranchBB(target1=target1, target2=target2)
+            cfg[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr)
+
+            if contains_back_edge:
+                loop_counter_addr += 32
 
     return cfg
 
 
 @st.composite
-def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
+def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
     """Generate a call to a precompile that produces real output data."""
-    bb = fuzzer.current_bb
 
     precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys())))
     precompile_name = PRECOMPILES[precompile_addr]
@@ -328,23 +304,21 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None:
     gas = bb.append_instruction("gas")
     addr = IRLiteral(precompile_addr)
 
-    bb.append_instruction(
-        "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size
-    )
+    bb.append_instruction("staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size)
 
 
 @st.composite
-def basic_block_instructions(draw, fuzzer: MemoryFuzzer) -> None:
+def basic_block_instructions(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
     """Generate instructions for a basic block."""
     num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK))
 
     for _ in range(num_instructions):
-        inst_type = draw(st.sampled_from(["memory", "precompile"]))
+        inst_type = draw(st.sampled_from(["memory"] * 9 + ["precompile"]))
 
         if inst_type == "memory":
-            draw(memory_instruction(fuzzer))
+            draw(memory_instruction(fuzzer, bb))
         elif inst_type == "precompile":
-            draw(precompile_call(fuzzer))
+            draw(precompile_call(fuzzer, bb))
         else:
             raise Exception("unreachable")
 
@@ -362,6 +336,9 @@ def venom_function_with_memory_ops(draw) -> IRContext:
     num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS))
     basic_blocks = []
 
+    # clear default entry block
+    fuzzer.function.clear_basic_blocks()
+
     for i in range(num_blocks):
         if i == 0:
             label = IRLabel("entry")
@@ -372,7 +349,7 @@ def venom_function_with_memory_ops(draw) -> IRContext:
         fuzzer.function.append_basic_block(bb)
         basic_blocks.append(bb)
 
-    fuzzer.function.entry = basic_blocks[0]
+    assert fuzzer.function.entry is basic_blocks[0]
 
     cfg = draw(control_flow_graph(basic_blocks))
 
@@ -382,55 +359,52 @@ def venom_function_with_memory_ops(draw) -> IRContext:
     )
 
     # extract loop counter addresses and initialize them
-    used_counter_addrs = set()
+    counter_addrs = set()
     for bb_type in cfg.values():
         if isinstance(bb_type, _BranchBB) and bb_type.counter_addr is not None:
             addr = bb_type.counter_addr
-            assert addr not in used_counter_addrs, f"Duplicate counter address {addr}"
-            used_counter_addrs.add(addr)
+            assert addr not in counter_addrs, f"Duplicate counter address {addr}"
+            counter_addrs.add(addr)
 
-    for addr in used_counter_addrs:
+    for addr in counter_addrs:
         entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr))
 
     for bb in basic_blocks:
-        fuzzer.current_bb = bb
-
-        draw(basic_block_instructions(fuzzer))
+        draw(basic_block_instructions(fuzzer, bb))
 
         bb_type = cfg[bb]
-        
+
         if isinstance(bb_type, _ReturnBB):
             bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0))
-            
+
         elif isinstance(bb_type, _JumpBB):
             bb.append_instruction("jmp", bb_type.target.label)
-            
-        elif isinstance(bb_type, _BranchBB):
-            if fuzzer.available_vars:
-                cond_var = draw(st.sampled_from(fuzzer.available_vars))
-            else:
-                cond_var = bb.append_instruction("mload", IRLiteral(0))
 
+        elif isinstance(bb_type, _BranchBB):
+            cond_var = fuzzer.get_random_variable(draw)
+            # get bottom bit, for bias reasons
             cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
 
-            if bb_type.is_back_edge:
-                loop_counter_addr = bb_type.counter_addr
+            if bb_type.has_back_edge:
+                loop_counter_addr = IRLiteral(bb_type.counter_addr)
 
-                counter = bb.append_instruction("mload", IRLiteral(loop_counter_addr))
-                counter = bb.append_instruction("add", counter, IRLiteral(1))
-                bb.append_instruction("mstore", counter, IRLiteral(loop_counter_addr))
+                counter = bb.append_instruction("mload", loop_counter_addr)
+                incr_counter = bb.append_instruction("add", counter, IRLiteral(1))
+                bb.append_instruction("mstore", incr_counter, loop_counter_addr)
 
-                # continue loop only if: counter < MAX_LOOP_ITERATIONS AND original condition
-                counter_lt_max = bb.append_instruction(
-                    "lt", incremented, IRLiteral(MAX_LOOP_ITERATIONS)
-                )
+                # exit loop when counter >= MAX_LOOP_ITERATIONS
+                # (note we are guaranteed that second target provides forward
+                # progress)
+                max_iterations = IRLiteral(MAX_LOOP_ITERATIONS)
+                # counter < iterbound
+                counter_ok = bb.append_instruction("lt", counter, max_iterations)
 
-                cond_var = bb.append_instruction("and", counter_lt_max, cond_var)
+                cond_var = bb.append_instruction("and", counter_ok, cond_var)
 
             bb.append_instruction("jnz", bb_type.target1.label, bb_type.target2.label, cond_var)
-            
+
         else:
-            raise Exception() # unreachable
+            raise Exception()  # unreachable
 
     fuzzer.ensure_all_vars_have_values()
 
@@ -469,13 +443,13 @@ def run_passes(self, ctx: IRContext) -> None:
     "pass_list",
     [
         # Test individual memory passes
-        [LoadEliminationPass],
-        [DeadStoreEliminationPass],
-        [MemMergingPass],
+        [LoadElimination],
+        [DeadStoreElimination],
+        [MemMergePass],
         # Test combinations
-        [LoadEliminationPass, DeadStoreEliminationPass],
-        [DeadStoreEliminationPass, LoadEliminationPass],
-        [LoadEliminationPass, MemMergingPass],
+        [LoadElimination, DeadStoreElimination],
+        [DeadStoreElimination, LoadElimination],
+        [LoadElimination, MemMergePass],
     ],
 )
 @hp.given(ctx=venom_function_with_memory_ops())
@@ -508,9 +482,6 @@ def test_memory_passes_fuzzing(pass_list, ctx):
 
 def generate_sample_ir() -> IRContext:
     """Generate a sample IR for manual inspection."""
-    import random
-
-    random.seed(42)
     ctx = venom_function_with_memory_ops().example()
     return ctx
 
@@ -518,11 +489,10 @@ def generate_sample_ir() -> IRContext:
 if __name__ == "__main__":
     ctx = generate_sample_ir()
 
-    if ctx and ctx.functions:
-        func = list(ctx.functions.values())[0]
-        print(f"Generated function with {func.num_basic_blocks} basic blocks:")
-        print(func)
+    func = list(ctx.functions.values())[0]
+    print(f"Generated function with {func.num_basic_blocks} basic blocks:")
+    print(func)
 
-        checker = MemoryFuzzChecker([LoadEliminationPass])
-        checker.run_passes(ctx)
-        print("\nPasses completed successfully")
+    checker = MemoryFuzzChecker([MemMergePass])
+    checker.run_passes(ctx)
+    print(ctx)

From d42ed649d985583280864103a4b17836ffde3a9f Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 20:43:22 +0200
Subject: [PATCH 08/24] add evm harness

---
 tests/functional/venom/test_memory_fuzzer.py | 96 +++++++++++++++++---
 1 file changed, 82 insertions(+), 14 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 60c6c03c31..e0bf070c27 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -13,11 +13,14 @@
 import hypothesis.strategies as st
 import pytest
 
+from tests.evm_backends.base_env import ExecutionReverted
+from vyper.ir.compile_ir import assembly_to_evm
+from vyper.venom import SingleUseExpansion, VenomCompiler
 from vyper.venom.analysis import IRAnalysesCache
 from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
-from vyper.venom.passes import DeadStoreElimination,LoadElimination, MemMergePass
+from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass
 
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
 
@@ -36,8 +39,8 @@
 }
 
 MAX_MEMORY_SIZE = 4096
-MAX_BASIC_BLOCKS = 8
-MAX_INSTRUCTIONS_PER_BLOCK = 8
+MAX_BASIC_BLOCKS = 50
+MAX_INSTRUCTIONS_PER_BLOCK = 50
 MAX_LOOP_ITERATIONS = 12
 
 
@@ -324,8 +327,14 @@ def basic_block_instructions(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> No
 
 
 @st.composite
-def venom_function_with_memory_ops(draw) -> IRContext:
-    """Generate a complete Venom IR function using IRBasicBlock API."""
+def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
+    """Generate a complete Venom IR function using IRBasicBlock API.
+
+    Returns:
+        tuple[IRContext, int]: The generated IR context and the required calldata size.
+        The calldata size includes both the initial memory seed (MAX_MEMORY_SIZE bytes)
+        and any additional calldata needed for unassigned variables.
+    """
     fuzzer = MemoryFuzzer()
 
     func_name = IRLabel("_fuzz_function", is_symbol=True)
@@ -408,7 +417,7 @@ def venom_function_with_memory_ops(draw) -> IRContext:
 
     fuzzer.ensure_all_vars_have_values()
 
-    return fuzzer.ctx
+    return fuzzer.ctx, fuzzer.calldata_offset
 
 
 class MemoryFuzzChecker:
@@ -418,16 +427,33 @@ def __init__(self, passes: list[type], post_passes: list[type] = None):
         self.passes = passes
         self.post_passes = post_passes or []
 
-    def run_passes(self, ctx: IRContext) -> None:
+    def compile_to_bytecode(self, ctx: IRContext) -> bytes:
+        """Compile Venom IR context to EVM bytecode."""
+        # Need SingleUseExpansion for venom_to_assembly
+        for fn in ctx.functions.values():
+            ac = IRAnalysesCache(fn)
+            SingleUseExpansion(ac, fn).run_pass()
+
+        # Compile to assembly and then to bytecode
+        compiler = VenomCompiler([ctx])
+        asm = compiler.generate_evm(no_optimize=False)
+        bytecode, _ = assembly_to_evm(asm)
+        return bytecode
+
+    def run_passes(self, ctx: IRContext) -> IRContext:
         """
         Run optimization passes on the IR context.
 
-        This method lets exceptions bubble up so Hypothesis can handle them properly.
+        Returns the optimized context.
         """
         optimized_ctx = ctx.copy()
 
         for fn in optimized_ctx.functions.values():
             ac = IRAnalysesCache(fn)
+
+            # Convert to SSA form first if needed by the passes
+            MakeSSA(ac, fn).run_pass()
+
             for pass_class in self.passes:
                 pass_obj = pass_class(ac, fn)
                 pass_obj.run_pass()
@@ -436,6 +462,47 @@ def run_passes(self, ctx: IRContext) -> None:
                 pass_obj = pass_class(ac, fn)
                 pass_obj.run_pass()
 
+        return optimized_ctx
+
+    def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, bytes]:
+        """Execute bytecode with given calldata and return success status and output."""
+        deployed_address = env._deploy(bytecode, value=0)
+
+        try:
+            result = env.message_call(to=deployed_address, data=calldata, value=0)
+            return True, result
+        except ExecutionReverted as e:
+            # return revert data if available
+            return False, e.args[0] if e.args else b""
+        except Exception:
+            # other errors like out of gas
+            return False, b""
+
+    def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None:
+        """Check equivalence between unoptimized and optimized execution."""
+        unopt_bytecode = self.compile_to_bytecode(ctx)
+
+        opt_ctx = self.run_passes(ctx)
+        opt_bytecode = self.compile_to_bytecode(opt_ctx)
+
+        unopt_success, unopt_output = self.execute_bytecode(unopt_bytecode, calldata, env)
+        opt_success, opt_output = self.execute_bytecode(opt_bytecode, calldata, env)
+
+        assert (
+            unopt_success == opt_success
+        ), f"Execution success mismatch: unopt={unopt_success}, opt={opt_success}"
+        assert (
+            unopt_output == opt_output
+        ), f"Output mismatch: unopt={unopt_output.hex()}, opt={opt_output.hex()}"
+
+
+@st.composite
+def venom_with_calldata(draw):
+    """Generate Venom IR context with matching calldata."""
+    ctx, calldata_size = draw(venom_function_with_memory_ops())
+    calldata = draw(st.binary(min_size=calldata_size, max_size=calldata_size))
+    return ctx, calldata
+
 
 # Test with memory-related passes
 @pytest.mark.fuzzing
@@ -452,7 +519,7 @@ def run_passes(self, ctx: IRContext) -> None:
         [LoadElimination, MemMergePass],
     ],
 )
-@hp.given(ctx=venom_function_with_memory_ops())
+@hp.given(venom_with_calldata())
 @hp.settings(
     max_examples=100,
     suppress_health_check=(
@@ -462,22 +529,23 @@ def run_passes(self, ctx: IRContext) -> None:
     ),
     deadline=None,
 )
-def test_memory_passes_fuzzing(pass_list, ctx):
+def test_memory_passes_fuzzing(pass_list, venom_data, env):
     """
     Property-based test for memory optimization passes.
 
-    Tests that memory passes do not crash on complex IR.
+    Tests that memory passes preserve semantics by comparing EVM execution results.
     """
+    ctx, calldata = venom_data
+
     hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}")
 
     if hasattr(ctx, "functions") and ctx.functions:
         func = list(ctx.functions.values())[0]
         hp.note(f"Generated function with {func.num_basic_blocks} basic blocks")
-        for bb in func.get_basic_blocks():
-            hp.note(f"Block {bb.label.value}: {len(bb.instructions)} instructions")
+        hp.note(f"Calldata size: {len(calldata)} bytes")
 
     checker = MemoryFuzzChecker(pass_list)
-    checker.run_passes(ctx)
+    checker.check_equivalence(ctx, calldata, env)
 
 
 def generate_sample_ir() -> IRContext:

From 50feb1cdcde263b8aa0ac80cdd900e1668db028c Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 20:52:54 +0200
Subject: [PATCH 09/24] cleanup, add proper deploy code

---
 tests/functional/venom/test_memory_fuzzer.py | 52 ++++++++++----------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index e0bf070c27..3d5448082d 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -429,7 +429,7 @@ def __init__(self, passes: list[type], post_passes: list[type] = None):
 
     def compile_to_bytecode(self, ctx: IRContext) -> bytes:
         """Compile Venom IR context to EVM bytecode."""
-        # Need SingleUseExpansion for venom_to_assembly
+        # assumes MakeSSA has already been run
         for fn in ctx.functions.values():
             ac = IRAnalysesCache(fn)
             SingleUseExpansion(ac, fn).run_pass()
@@ -442,7 +442,7 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes:
 
     def run_passes(self, ctx: IRContext) -> IRContext:
         """
-        Run optimization passes on the IR context.
+        Copies the IRContext and runs optimization passes on the copy of the IR context.
 
         Returns the optimized context.
         """
@@ -451,9 +451,6 @@ def run_passes(self, ctx: IRContext) -> IRContext:
         for fn in optimized_ctx.functions.values():
             ac = IRAnalysesCache(fn)
 
-            # Convert to SSA form first if needed by the passes
-            MakeSSA(ac, fn).run_pass()
-
             for pass_class in self.passes:
                 pass_obj = pass_class(ac, fn)
                 pass_obj.run_pass()
@@ -466,34 +463,38 @@ def run_passes(self, ctx: IRContext) -> IRContext:
 
     def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, bytes]:
         """Execute bytecode with given calldata and return success status and output."""
-        deployed_address = env._deploy(bytecode, value=0)
+        # wrap runtime bytecode in deploy bytecode that returns it
+        bytecode_len = len(bytecode)
+        bytecode_len_hex = hex(bytecode_len)[2:].rjust(4, "0")
+        # deploy preamble: PUSH2 len, 0, DUP2, PUSH1 0a, 0, CODECOPY, RETURN
+        deploy_preamble = bytes.fromhex("61" + bytecode_len_hex + "3d81600a3d39f3")
+        deploy_bytecode = deploy_preamble + bytecode
+
+        deployed_address = env._deploy(deploy_bytecode)
 
         try:
-            result = env.message_call(to=deployed_address, data=calldata, value=0)
+            result = env.message_call(to=deployed_address, data=calldata)
             return True, result
-        except ExecutionReverted as e:
-            # return revert data if available
-            return False, e.args[0] if e.args else b""
-        except Exception:
-            # other errors like out of gas
+        except EvmError as e:
             return False, b""
 
     def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None:
         """Check equivalence between unoptimized and optimized execution."""
-        unopt_bytecode = self.compile_to_bytecode(ctx)
+        # run MakeSSA on the original context first
+        for fn in ctx.functions.values():
+            ac = IRAnalysesCache(fn)
+            MakeSSA(ac, fn).run_pass()
 
         opt_ctx = self.run_passes(ctx)
-        opt_bytecode = self.compile_to_bytecode(opt_ctx)
 
-        unopt_success, unopt_output = self.execute_bytecode(unopt_bytecode, calldata, env)
-        opt_success, opt_output = self.execute_bytecode(opt_bytecode, calldata, env)
+        bytecode1 = self.compile_to_bytecode(ctx)
+        bytecode2 = self.compile_to_bytecode(opt_ctx)
 
-        assert (
-            unopt_success == opt_success
-        ), f"Execution success mismatch: unopt={unopt_success}, opt={opt_success}"
-        assert (
-            unopt_output == opt_output
-        ), f"Output mismatch: unopt={unopt_output.hex()}, opt={opt_output.hex()}"
+        succ1, out1 = self.execute_bytecode(bytecode1, calldata, env)
+        succ2, out2 = self.execute_bytecode(bytecode2, calldata, env)
+
+        assert succ1 == succ2, (succ1, out1, succ2, out2)
+        assert out1 == out2, (succ1, out1, succ2, out2)
 
 
 @st.composite
@@ -539,10 +540,9 @@ def test_memory_passes_fuzzing(pass_list, venom_data, env):
 
     hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}")
 
-    if hasattr(ctx, "functions") and ctx.functions:
-        func = list(ctx.functions.values())[0]
-        hp.note(f"Generated function with {func.num_basic_blocks} basic blocks")
-        hp.note(f"Calldata size: {len(calldata)} bytes")
+    func = list(ctx.functions.values())[0]
+    hp.note(f"Generated function with {func.num_basic_blocks} basic blocks")
+    hp.note(f"Calldata size: {len(calldata)} bytes")
 
     checker = MemoryFuzzChecker(pass_list)
     checker.check_equivalence(ctx, calldata, env)

From 554d34e0ca69138a62f79a63dc40ef8de36dc0c7 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 20:53:56 +0200
Subject: [PATCH 10/24] lint

---
 tests/functional/venom/test_memory_fuzzer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 3d5448082d..a38640ece1 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -13,7 +13,7 @@
 import hypothesis.strategies as st
 import pytest
 
-from tests.evm_backends.base_env import ExecutionReverted
+from tests.evm_backends.base_env import EvmError
 from vyper.ir.compile_ir import assembly_to_evm
 from vyper.venom import SingleUseExpansion, VenomCompiler
 from vyper.venom.analysis import IRAnalysesCache
@@ -475,7 +475,7 @@ def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool,
         try:
             result = env.message_call(to=deployed_address, data=calldata)
             return True, result
-        except EvmError as e:
+        except EvmError:
             return False, b""
 
     def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None:

From 3c700c56e23769a16c53749f2e8b256f351c19f3 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 21:55:40 +0200
Subject: [PATCH 11/24] debug, analysis for available vars

---
 tests/evm_backends/base_env.py               |   4 +-
 tests/evm_backends/pyevm_env.py              |   2 +-
 tests/evm_backends/revm_env.py               |   2 +-
 tests/functional/venom/test_memory_fuzzer.py | 175 +++++++++++++------
 vyper/venom/basicblock.py                    |   6 +-
 vyper/venom/function.py                      |   1 +
 6 files changed, 136 insertions(+), 54 deletions(-)

diff --git a/tests/evm_backends/base_env.py b/tests/evm_backends/base_env.py
index 77a0182664..0dd2e6dc97 100644
--- a/tests/evm_backends/base_env.py
+++ b/tests/evm_backends/base_env.py
@@ -61,7 +61,7 @@ def deploy(self, abi: list[dict], bytecode: bytes, value=0, *args, **kwargs):
             ctor = ABIFunction(ctor_abi, contract_name=factory._name)
             initcode += ctor.prepare_calldata(*args, **kwargs)
 
-        deployed_at = self._deploy(initcode, value)
+        deployed_at = self._deploy(initcode, value=value)
         address = to_checksum_address(deployed_at)
         return factory.at(self, address)
 
@@ -181,7 +181,7 @@ def get_excess_blob_gas(self) -> Optional[int]:
     def set_excess_blob_gas(self, param):
         raise NotImplementedError  # must be implemented by subclasses
 
-    def _deploy(self, code: bytes, value: int, gas: int | None = None) -> str:
+    def _deploy(self, code: bytes, value: int = 0, gas: int | None = None) -> str:
         raise NotImplementedError  # must be implemented by subclasses
 
     @staticmethod
diff --git a/tests/evm_backends/pyevm_env.py b/tests/evm_backends/pyevm_env.py
index 6c510278a7..7e91780392 100644
--- a/tests/evm_backends/pyevm_env.py
+++ b/tests/evm_backends/pyevm_env.py
@@ -189,7 +189,7 @@ def get_excess_blob_gas(self) -> Optional[int]:
     def set_excess_blob_gas(self, param):
         self._context._excess_blob_gas = param
 
-    def _deploy(self, code: bytes, value: int, gas: int = None) -> str:
+    def _deploy(self, code: bytes, value: int = 0, gas: int = None) -> str:
         sender = _addr(self.deployer)
         target_address = self._generate_contract_address(sender)
 
diff --git a/tests/evm_backends/revm_env.py b/tests/evm_backends/revm_env.py
index d5a7570f96..1c3643a591 100644
--- a/tests/evm_backends/revm_env.py
+++ b/tests/evm_backends/revm_env.py
@@ -135,7 +135,7 @@ def get_blob_gasprice(self) -> Optional[int]:
     def set_excess_blob_gas(self, value):
         self._evm.env.block.excess_blob_gas = value
 
-    def _deploy(self, code: bytes, value: int, gas: int = None) -> str:
+    def _deploy(self, code: bytes, value: int = 0, gas: int = None) -> str:
         try:
             return self._evm.deploy(self.deployer, code, value, gas)
         except RuntimeError as e:
diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index a38640ece1..a68e1d0f7f 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -15,12 +15,12 @@
 
 from tests.evm_backends.base_env import EvmError
 from vyper.ir.compile_ir import assembly_to_evm
-from vyper.venom import SingleUseExpansion, VenomCompiler
+from vyper.venom import VenomCompiler
 from vyper.venom.analysis import IRAnalysesCache
 from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
-from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass
+from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass, AssignElimination, SingleUseExpansion, SimplifyCFGPass
 
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
 
@@ -38,7 +38,7 @@
     0x9: "blake2f",
 }
 
-MAX_MEMORY_SIZE = 4096
+MAX_MEMORY_SIZE = 256
 MAX_BASIC_BLOCKS = 50
 MAX_INSTRUCTIONS_PER_BLOCK = 50
 MAX_LOOP_ITERATIONS = 12
@@ -89,6 +89,8 @@ def __init__(self):
         self.calldata_offset = MAX_MEMORY_SIZE
         self.available_vars = []
         self.allocated_memory_slots = set()
+        # track which variables are available in each block
+        self.bb_available_vars = {}
 
     def get_next_variable(self) -> IRVariable:
         """Generate a new unique variable."""
@@ -118,17 +120,19 @@ def get_next_bb_label(self) -> IRLabel:
         self.bb_counter += 1
         return IRLabel(f"bb{self.bb_counter}")
 
-    def get_random_variable(self, draw) -> IRVariable:
+    def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable:
         """Get a random available variable or create a new one."""
-        if self.available_vars and draw(st.booleans()):
-            return draw(st.sampled_from(self.available_vars))
+        available_in_bb = self.bb_available_vars.get(bb, [])
+        if available_in_bb and draw(st.booleans()):
+            return draw(st.sampled_from(available_in_bb))
         else:
             return self.get_next_variable()
 
-    def get_memory_address(self, draw) -> IRVariable | IRLiteral:
+    def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral:
         """Get a memory address, biased towards interesting optimizer-relevant locations."""
-        if self.available_vars and draw(st.booleans()):
-            return draw(st.sampled_from(self.available_vars))
+        available_in_bb = self.bb_available_vars.get(bb, [])
+        if available_in_bb and draw(st.booleans()):
+            return draw(st.sampled_from(available_in_bb))
 
         if self.allocated_memory_slots and draw(st.booleans()):
             # bias towards addresses near existing allocations to create aliasing opportunities
@@ -167,22 +171,30 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
     """Generate and append a memory instruction to current basic block."""
     op = draw(st.sampled_from(MEMORY_OPS))
 
+    # track variables defined so far in this block
+    if bb not in fuzzer.bb_available_vars:
+        fuzzer.bb_available_vars[bb] = []
+
     if op == "mload":
-        addr = fuzzer.get_memory_address(draw)
+        addr = fuzzer.get_memory_address(draw, bb)
         result_var = bb.append_instruction("mload", addr)
         fuzzer.available_vars.append(result_var)
+        # add to variables available in this block
+        fuzzer.bb_available_vars[bb].append(result_var)
 
     elif op == "mstore":
-        if fuzzer.available_vars and draw(st.booleans()):
-            value = draw(st.sampled_from(fuzzer.available_vars))
+        # can use variables defined earlier in this block
+        available_in_bb = fuzzer.bb_available_vars.get(bb, [])
+        if available_in_bb and draw(st.booleans()):
+            value = draw(st.sampled_from(available_in_bb))
         else:
             value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256 - 1)))
-        addr = fuzzer.get_memory_address(draw)
+        addr = fuzzer.get_memory_address(draw, bb)
         bb.append_instruction("mstore", value, addr)
 
     elif op == "mcopy":
-        dest = fuzzer.get_memory_address(draw)
-        src = fuzzer.get_memory_address(draw)
+        dest = fuzzer.get_memory_address(draw, bb)
+        src = fuzzer.get_memory_address(draw, bb)
         length = draw(copy_length())
         bb.append_instruction("mcopy", dest, src, IRLiteral(length))
 
@@ -214,6 +226,11 @@ def control_flow_graph(draw, basic_blocks):
 
     while remaining_blocks:
         source = draw(st.sampled_from(reachable_blocks))
+
+        # we have already visited it
+        if source in cfg:
+            continue
+
         target = draw(st.sampled_from(remaining_blocks))
 
         # target is now reachable, but it may not be in cfg yet
@@ -269,8 +286,8 @@ def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
     precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys())))
     precompile_name = PRECOMPILES[precompile_addr]
 
-    input_ofst = fuzzer.get_memory_address(draw)
-    output_ofst = fuzzer.get_memory_address(draw)
+    input_ofst = fuzzer.get_memory_address(draw, bb)
+    output_ofst = fuzzer.get_memory_address(draw, bb)
 
     if precompile_name == "ecrecover":
         input_size = IRLiteral(128)  # v, r, s, hash
@@ -378,9 +395,63 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
     for addr in counter_addrs:
         entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr))
 
+    # first pass: generate instructions for each block
     for bb in basic_blocks:
         draw(basic_block_instructions(fuzzer, bb))
-
+    
+    # compute available variables at each block based on CFG
+    # a variable is available if it's defined in ALL paths to that block
+    bb_incoming_available = {}
+    
+    # collect variables defined in each block (already in fuzzer.bb_available_vars)
+    # and initialize incoming available sets
+    for bb in basic_blocks:
+        bb_incoming_available[bb] = set()
+    
+    # propagate available variables through CFG
+    # entry block starts with empty set
+    bb_incoming_available[basic_blocks[0]] = set()
+    
+    # iteratively propagate until fixpoint
+    changed = True
+    while changed:
+        changed = False
+        for bb in basic_blocks:
+            # find predecessors
+            preds = []
+            for pred_bb in basic_blocks:
+                pred_type = cfg[pred_bb]
+                if isinstance(pred_type, _JumpBB) and pred_type.target == bb:
+                    preds.append(pred_bb)
+                elif isinstance(pred_type, _BranchBB) and (pred_type.target1 == bb or pred_type.target2 == bb):
+                    preds.append(pred_bb)
+            
+            if preds:
+                # available vars = intersection of all predecessors' available + defined vars
+                new_available = None
+                for pred in preds:
+                    # variables available at end of predecessor = incoming + defined in pred
+                    pred_defined = set(fuzzer.bb_available_vars.get(pred, []))
+                    pred_avail = bb_incoming_available[pred] | pred_defined
+                    
+                    if new_available is None:
+                        new_available = pred_avail
+                    else:
+                        new_available = new_available & pred_avail
+                
+                if new_available != bb_incoming_available[bb]:
+                    bb_incoming_available[bb] = new_available
+                    changed = True
+    
+    # update fuzzer's bb_available_vars to include incoming variables
+    for bb in basic_blocks:
+        incoming = list(bb_incoming_available[bb])
+        existing = fuzzer.bb_available_vars.get(bb, [])
+        # incoming vars are available at the start, then vars defined in the block
+        fuzzer.bb_available_vars[bb] = incoming + existing
+    
+    # second pass: add terminators using available variables
+    for bb in basic_blocks:
         bb_type = cfg[bb]
 
         if isinstance(bb_type, _ReturnBB):
@@ -390,7 +461,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
             bb.append_instruction("jmp", bb_type.target.label)
 
         elif isinstance(bb_type, _BranchBB):
-            cond_var = fuzzer.get_random_variable(draw)
+            cond_var = fuzzer.get_random_variable(draw, bb)
             # get bottom bit, for bias reasons
             cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
 
@@ -410,7 +481,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
 
                 cond_var = bb.append_instruction("and", counter_ok, cond_var)
 
-            bb.append_instruction("jnz", bb_type.target1.label, bb_type.target2.label, cond_var)
+            bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label)
 
         else:
             raise Exception()  # unreachable
@@ -423,20 +494,19 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
 class MemoryFuzzChecker:
     """A pluggable checker for memory passes using fuzzing."""
 
-    def __init__(self, passes: list[type], post_passes: list[type] = None):
+    def __init__(self, passes: list[type]):
         self.passes = passes
-        self.post_passes = post_passes or []
 
     def compile_to_bytecode(self, ctx: IRContext) -> bytes:
         """Compile Venom IR context to EVM bytecode."""
         # assumes MakeSSA has already been run
         for fn in ctx.functions.values():
             ac = IRAnalysesCache(fn)
+            SimplifyCFGPass(ac, fn).run_pass()
             SingleUseExpansion(ac, fn).run_pass()
 
-        # Compile to assembly and then to bytecode
         compiler = VenomCompiler([ctx])
-        asm = compiler.generate_evm(no_optimize=False)
+        asm = compiler.generate_evm()
         bytecode, _ = assembly_to_evm(asm)
         return bytecode
 
@@ -455,10 +525,6 @@ def run_passes(self, ctx: IRContext) -> IRContext:
                 pass_obj = pass_class(ac, fn)
                 pass_obj.run_pass()
 
-            for pass_class in self.post_passes:
-                pass_obj = pass_class(ac, fn)
-                pass_obj.run_pass()
-
         return optimized_ctx
 
     def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, bytes]:
@@ -484,6 +550,7 @@ def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None:
         for fn in ctx.functions.values():
             ac = IRAnalysesCache(fn)
             MakeSSA(ac, fn).run_pass()
+            AssignElimination(ac, fn).run_pass()
 
         opt_ctx = self.run_passes(ctx)
 
@@ -507,35 +574,43 @@ def venom_with_calldata(draw):
 
 # Test with memory-related passes
 @pytest.mark.fuzzing
-@pytest.mark.parametrize(
-    "pass_list",
-    [
-        # Test individual memory passes
-        [LoadElimination],
-        [DeadStoreElimination],
-        [MemMergePass],
-        # Test combinations
-        [LoadElimination, DeadStoreElimination],
-        [DeadStoreElimination, LoadElimination],
-        [LoadElimination, MemMergePass],
-    ],
-)
-@hp.given(venom_with_calldata())
+#@pytest.mark.parametrize(
+#    "pass_list",
+#    [
+#        # Test individual memory passes
+#        [MemMergePass],
+#        [LoadElimination],
+#        [DeadStoreElimination],
+#        # Test combinations
+#        [LoadElimination, DeadStoreElimination],
+#        [DeadStoreElimination, LoadElimination],
+#        [LoadElimination, MemMergePass],
+#    ],
+#)
+@hp.given(venom_data=venom_with_calldata())
+
 @hp.settings(
-    max_examples=100,
+    max_examples=1000,
     suppress_health_check=(
         hp.HealthCheck.data_too_large,
         hp.HealthCheck.too_slow,
-        hp.HealthCheck.filter_too_much,
     ),
     deadline=None,
+    phases=(     
+        hp.Phase.explicit,
+        hp.Phase.reuse,
+        hp.Phase.generate,
+        hp.Phase.target,
+        # Phase.shrink,  # can force long waiting for examples                                                         
+    ),           
 )
-def test_memory_passes_fuzzing(pass_list, venom_data, env):
+def test_memory_passes_fuzzing(venom_data, env):
     """
     Property-based test for memory optimization passes.
 
     Tests that memory passes preserve semantics by comparing EVM execution results.
     """
+    pass_list = [MemMergePass]
     ctx, calldata = venom_data
 
     hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}")
@@ -543,6 +618,7 @@ def test_memory_passes_fuzzing(pass_list, venom_data, env):
     func = list(ctx.functions.values())[0]
     hp.note(f"Generated function with {func.num_basic_blocks} basic blocks")
     hp.note(f"Calldata size: {len(calldata)} bytes")
+    hp.note(str(ctx))
 
     checker = MemoryFuzzChecker(pass_list)
     checker.check_equivalence(ctx, calldata, env)
@@ -550,17 +626,18 @@ def test_memory_passes_fuzzing(pass_list, venom_data, env):
 
 def generate_sample_ir() -> IRContext:
     """Generate a sample IR for manual inspection."""
-    ctx = venom_function_with_memory_ops().example()
+    ctx, _ = venom_function_with_memory_ops().example()
     return ctx
 
 
 if __name__ == "__main__":
     ctx = generate_sample_ir()
 
-    func = list(ctx.functions.values())[0]
-    print(f"Generated function with {func.num_basic_blocks} basic blocks:")
-    print(func)
+    #func = list(ctx.functions.values())[0]
+    #print(func)
 
     checker = MemoryFuzzChecker([MemMergePass])
     checker.run_passes(ctx)
     print(ctx)
+    bytecode = checker.compile_to_bytecode(ctx)
+    print(bytecode.hex())
diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py
index 27d1e2c7fd..4a2d2f84b4 100644
--- a/vyper/venom/basicblock.py
+++ b/vyper/venom/basicblock.py
@@ -668,7 +668,11 @@ def copy(self) -> IRBasicBlock:
     def __repr__(self) -> str:
         printer = ir_printer.get()
 
-        s = f"{repr(self.label)}:  ; OUT={[bb.label for bb in self.out_bbs]}\n"
+        s = f"{repr(self.label)}:"
+        if self.is_terminated:
+            s += f"  ; OUT={[bb.label for bb in self.out_bbs]}"
+        s += "\n"
+
         if printer and hasattr(printer, "_pre_block"):
             s += printer._pre_block(self)
         for inst in self.instructions:
diff --git a/vyper/venom/function.py b/vyper/venom/function.py
index 3ad63b207a..d9dc2e07b2 100644
--- a/vyper/venom/function.py
+++ b/vyper/venom/function.py
@@ -161,6 +161,7 @@ def error_msg(self) -> Optional[str]:
 
     def copy(self):
         new = IRFunction(self.name)
+        new.clear_basic_blocks()
         for bb in self.get_basic_blocks():
             new_bb = bb.copy()
             new.append_basic_block(new_bb)

From 1b52d0a795a258922f684ca16cf03a6a276186dd Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 22:34:47 +0200
Subject: [PATCH 12/24] fix weakref bug

---
 vyper/venom/function.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vyper/venom/function.py b/vyper/venom/function.py
index d9dc2e07b2..7fb483baa0 100644
--- a/vyper/venom/function.py
+++ b/vyper/venom/function.py
@@ -66,6 +66,7 @@ def append_basic_block(self, bb: IRBasicBlock):
         assert isinstance(bb, IRBasicBlock), bb
         assert bb.label.name not in self._basic_block_dict, bb.label
         self._basic_block_dict[bb.label.name] = bb
+        bb.parent = self  # ensure parent is updated
 
     def remove_basic_block(self, bb: IRBasicBlock):
         assert isinstance(bb, IRBasicBlock), bb

From 0ff2b9b43992e50ab18ef5ec7f1b360c71b40462 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 23:12:44 +0200
Subject: [PATCH 13/24] fix assumption in venom_to_assembly: entry block has no
 predecessors

---
 vyper/venom/venom_to_assembly.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index 4c5a2bfcda..f4b28a3bce 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -342,6 +342,8 @@ def _generate_evm_for_basicblock_r(
 
         fn = basicblock.parent
         if basicblock == fn.entry:
+            # Entry block should not have predecessors (no back edges to entry)
+            assert len(self.cfg.cfg_in(basicblock)) == 0
             self._prepare_stack_for_function(asm, fn, stack)
 
         if len(self.cfg.cfg_in(basicblock)) == 1:

From a67d12d10c0e878e3990669d1fd4cfa669aee699 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 23:13:10 +0200
Subject: [PATCH 14/24] allocate variables after the fact, rather than finding
 available

variables
---
 tests/functional/venom/test_memory_fuzzer.py | 117 +++++++++----------
 1 file changed, 57 insertions(+), 60 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index a68e1d0f7f..d070d01e81 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -78,6 +78,11 @@ def has_back_edge(self) -> bool:
         return self.counter_addr is not None
 
 
+class SymbolicVar(IRVariable):
+    """Placeholder for a variable that will be resolved later"""
+    pass
+
+
 class MemoryFuzzer:
     """Generates random Venom IR with memory operations using IRBasicBlock API."""
 
@@ -91,6 +96,9 @@ def __init__(self):
         self.allocated_memory_slots = set()
         # track which variables are available in each block
         self.bb_available_vars = {}
+        # symbolic variable tracking
+        self.symbolic_counter = 0
+        self.symbolic_mapping = {}  # SymbolicVar -> IRVariable
 
     def get_next_variable(self) -> IRVariable:
         """Generate a new unique variable."""
@@ -98,6 +106,35 @@ def get_next_variable(self) -> IRVariable:
         var = IRVariable(f"v{self.variable_counter}")
         self.available_vars.append(var)
         return var
+    
+    def fresh_symbolic(self) -> SymbolicVar:
+        """Create a new symbolic variable"""
+        self.symbolic_counter += 1
+        return SymbolicVar(f"%sym_{self.symbolic_counter}")
+    
+    def resolve_all_variables(self):
+        """After building all blocks, replace symbolic vars with real ones"""
+        # Map all symbolic vars to real variables
+        for bb in self.function.get_basic_blocks():
+            for inst in bb.instructions:
+                # Handle output
+                if inst.output and isinstance(inst.output, SymbolicVar):
+                    if inst.output not in self.symbolic_mapping:
+                        self.symbolic_mapping[inst.output] = self.get_next_variable()
+                    inst.output = self.symbolic_mapping[inst.output]
+                
+                # Handle inputs
+                new_operands = []
+                for op in inst.operands:
+                    if isinstance(op, SymbolicVar):
+                        if op not in self.symbolic_mapping:
+                            # This symbolic var was never defined as output
+                            # Create a fresh variable for it
+                            self.symbolic_mapping[op] = self.get_next_variable()
+                        new_operands.append(self.symbolic_mapping[op])
+                    else:
+                        new_operands.append(op)
+                inst.operands = new_operands
 
     def ensure_all_vars_have_values(self) -> None:
         """Ensure all available variables have values by using calldataload for unassigned ones."""
@@ -120,13 +157,13 @@ def get_next_bb_label(self) -> IRLabel:
         self.bb_counter += 1
         return IRLabel(f"bb{self.bb_counter}")
 
-    def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable:
+    def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable | SymbolicVar:
         """Get a random available variable or create a new one."""
         available_in_bb = self.bb_available_vars.get(bb, [])
         if available_in_bb and draw(st.booleans()):
             return draw(st.sampled_from(available_in_bb))
         else:
-            return self.get_next_variable()
+            return self.fresh_symbolic()
 
     def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral:
         """Get a memory address, biased towards interesting optimizer-relevant locations."""
@@ -177,8 +214,8 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
 
     if op == "mload":
         addr = fuzzer.get_memory_address(draw, bb)
-        result_var = bb.append_instruction("mload", addr)
-        fuzzer.available_vars.append(result_var)
+        result_var = fuzzer.fresh_symbolic()
+        bb.append_instruction("mload", addr, ret=result_var)
         # add to variables available in this block
         fuzzer.bb_available_vars[bb].append(result_var)
 
@@ -202,6 +239,7 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
         raise ValueError("unreachable")
 
 
+
 @st.composite
 def control_flow_graph(draw, basic_blocks):
     """
@@ -209,6 +247,7 @@ def control_flow_graph(draw, basic_blocks):
     1. All blocks are reachable from entry
     2. No infinite loops (all loops terminate within 12 iterations)
     3. Proper use of jump and branch instructions
+    4. No back edges to entry block
     """
     cfg: dict[IRBasicBlock, _BBType] = {}
 
@@ -219,6 +258,9 @@ def control_flow_graph(draw, basic_blocks):
     forward_targets = {}
     for i, bb in enumerate(basic_blocks):
         forward_targets[bb] = basic_blocks[i + 1 :]
+    
+    # All blocks except entry (to prevent back edges to entry)
+    non_entry_blocks = basic_blocks[1:]
 
     # create a spanning tree to ensure all blocks are reachable
     remaining_blocks = basic_blocks[1:]  # exclude entry block
@@ -240,7 +282,9 @@ def control_flow_graph(draw, basic_blocks):
         if draw(st.booleans()):
             cfg[source] = _JumpBB(target=target)
         else:
-            other_target = draw(st.sampled_from(basic_blocks))
+            # For branches, allow any block as the other target except entry
+            # (target is already guaranteed to be forward)
+            other_target = draw(st.sampled_from(non_entry_blocks))
             cfg[source] = _BranchBB(target1=target, target2=other_target)
 
     # classify remaining blocks that were not handled during spanning
@@ -257,8 +301,9 @@ def control_flow_graph(draw, basic_blocks):
             target = draw(st.sampled_from(forward_targets[bb]))
             cfg[bb] = _JumpBB(target=target)
         else:  # branch
-            target1 = draw(st.sampled_from(basic_blocks))
-            target2 = draw(st.sampled_from(basic_blocks))
+            # Choose targets, but never allow entry as a target
+            target1 = draw(st.sampled_from(non_entry_blocks))
+            target2 = draw(st.sampled_from(non_entry_blocks))
 
             is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(bb)
             is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(bb)
@@ -395,62 +440,11 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
     for addr in counter_addrs:
         entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr))
 
-    # first pass: generate instructions for each block
+    # generate instructions for each block
     for bb in basic_blocks:
         draw(basic_block_instructions(fuzzer, bb))
     
-    # compute available variables at each block based on CFG
-    # a variable is available if it's defined in ALL paths to that block
-    bb_incoming_available = {}
-    
-    # collect variables defined in each block (already in fuzzer.bb_available_vars)
-    # and initialize incoming available sets
-    for bb in basic_blocks:
-        bb_incoming_available[bb] = set()
-    
-    # propagate available variables through CFG
-    # entry block starts with empty set
-    bb_incoming_available[basic_blocks[0]] = set()
-    
-    # iteratively propagate until fixpoint
-    changed = True
-    while changed:
-        changed = False
-        for bb in basic_blocks:
-            # find predecessors
-            preds = []
-            for pred_bb in basic_blocks:
-                pred_type = cfg[pred_bb]
-                if isinstance(pred_type, _JumpBB) and pred_type.target == bb:
-                    preds.append(pred_bb)
-                elif isinstance(pred_type, _BranchBB) and (pred_type.target1 == bb or pred_type.target2 == bb):
-                    preds.append(pred_bb)
-            
-            if preds:
-                # available vars = intersection of all predecessors' available + defined vars
-                new_available = None
-                for pred in preds:
-                    # variables available at end of predecessor = incoming + defined in pred
-                    pred_defined = set(fuzzer.bb_available_vars.get(pred, []))
-                    pred_avail = bb_incoming_available[pred] | pred_defined
-                    
-                    if new_available is None:
-                        new_available = pred_avail
-                    else:
-                        new_available = new_available & pred_avail
-                
-                if new_available != bb_incoming_available[bb]:
-                    bb_incoming_available[bb] = new_available
-                    changed = True
-    
-    # update fuzzer's bb_available_vars to include incoming variables
-    for bb in basic_blocks:
-        incoming = list(bb_incoming_available[bb])
-        existing = fuzzer.bb_available_vars.get(bb, [])
-        # incoming vars are available at the start, then vars defined in the block
-        fuzzer.bb_available_vars[bb] = incoming + existing
-    
-    # second pass: add terminators using available variables
+    # add terminators
     for bb in basic_blocks:
         bb_type = cfg[bb]
 
@@ -486,6 +480,9 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
         else:
             raise Exception()  # unreachable
 
+    # resolve all symbolic variables to real ones
+    fuzzer.resolve_all_variables()
+    
     fuzzer.ensure_all_vars_have_values()
 
     return fuzzer.ctx, fuzzer.calldata_offset

From 798055b85b9ca01556e90200bf3d418afcd7180b Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Sun, 8 Jun 2025 23:14:25 +0200
Subject: [PATCH 15/24] lint

---
 tests/functional/venom/test_memory_fuzzer.py | 44 +++++++++++---------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index d070d01e81..710efce676 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -20,7 +20,15 @@
 from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
-from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass, AssignElimination, SingleUseExpansion, SimplifyCFGPass
+from vyper.venom.passes import (
+    AssignElimination,
+    DeadStoreElimination,
+    LoadElimination,
+    MakeSSA,
+    MemMergePass,
+    SimplifyCFGPass,
+    SingleUseExpansion,
+)
 
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
 
@@ -80,6 +88,7 @@ def has_back_edge(self) -> bool:
 
 class SymbolicVar(IRVariable):
     """Placeholder for a variable that will be resolved later"""
+
     pass
 
 
@@ -106,12 +115,12 @@ def get_next_variable(self) -> IRVariable:
         var = IRVariable(f"v{self.variable_counter}")
         self.available_vars.append(var)
         return var
-    
+
     def fresh_symbolic(self) -> SymbolicVar:
         """Create a new symbolic variable"""
         self.symbolic_counter += 1
         return SymbolicVar(f"%sym_{self.symbolic_counter}")
-    
+
     def resolve_all_variables(self):
         """After building all blocks, replace symbolic vars with real ones"""
         # Map all symbolic vars to real variables
@@ -122,7 +131,7 @@ def resolve_all_variables(self):
                     if inst.output not in self.symbolic_mapping:
                         self.symbolic_mapping[inst.output] = self.get_next_variable()
                     inst.output = self.symbolic_mapping[inst.output]
-                
+
                 # Handle inputs
                 new_operands = []
                 for op in inst.operands:
@@ -239,7 +248,6 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
         raise ValueError("unreachable")
 
 
-
 @st.composite
 def control_flow_graph(draw, basic_blocks):
     """
@@ -258,7 +266,7 @@ def control_flow_graph(draw, basic_blocks):
     forward_targets = {}
     for i, bb in enumerate(basic_blocks):
         forward_targets[bb] = basic_blocks[i + 1 :]
-    
+
     # All blocks except entry (to prevent back edges to entry)
     non_entry_blocks = basic_blocks[1:]
 
@@ -443,7 +451,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
     # generate instructions for each block
     for bb in basic_blocks:
         draw(basic_block_instructions(fuzzer, bb))
-    
+
     # add terminators
     for bb in basic_blocks:
         bb_type = cfg[bb]
@@ -482,7 +490,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
 
     # resolve all symbolic variables to real ones
     fuzzer.resolve_all_variables()
-    
+
     fuzzer.ensure_all_vars_have_values()
 
     return fuzzer.ctx, fuzzer.calldata_offset
@@ -571,7 +579,7 @@ def venom_with_calldata(draw):
 
 # Test with memory-related passes
 @pytest.mark.fuzzing
-#@pytest.mark.parametrize(
+# @pytest.mark.parametrize(
 #    "pass_list",
 #    [
 #        # Test individual memory passes
@@ -583,23 +591,19 @@ def venom_with_calldata(draw):
 #        [DeadStoreElimination, LoadElimination],
 #        [LoadElimination, MemMergePass],
 #    ],
-#)
+# )
 @hp.given(venom_data=venom_with_calldata())
-
 @hp.settings(
     max_examples=1000,
-    suppress_health_check=(
-        hp.HealthCheck.data_too_large,
-        hp.HealthCheck.too_slow,
-    ),
+    suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow),
     deadline=None,
-    phases=(     
+    phases=(
         hp.Phase.explicit,
         hp.Phase.reuse,
         hp.Phase.generate,
         hp.Phase.target,
-        # Phase.shrink,  # can force long waiting for examples                                                         
-    ),           
+        # Phase.shrink,  # can force long waiting for examples
+    ),
 )
 def test_memory_passes_fuzzing(venom_data, env):
     """
@@ -630,8 +634,8 @@ def generate_sample_ir() -> IRContext:
 if __name__ == "__main__":
     ctx = generate_sample_ir()
 
-    #func = list(ctx.functions.values())[0]
-    #print(func)
+    # func = list(ctx.functions.values())[0]
+    # print(func)
 
     checker = MemoryFuzzChecker([MemMergePass])
     checker.run_passes(ctx)

From 69383a89153c92240cb63861d0d86751f222bf29 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Mon, 9 Jun 2025 10:59:38 +0200
Subject: [PATCH 16/24] fix variable allocation

---
 tests/functional/venom/test_memory_fuzzer.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 710efce676..122daa4bbf 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -98,7 +98,6 @@ class MemoryFuzzer:
     def __init__(self):
         self.ctx = IRContext()
         self.function = None
-        self.variable_counter = 0
         self.bb_counter = 0
         self.calldata_offset = MAX_MEMORY_SIZE
         self.available_vars = []
@@ -110,9 +109,9 @@ def __init__(self):
         self.symbolic_mapping = {}  # SymbolicVar -> IRVariable
 
     def get_next_variable(self) -> IRVariable:
-        """Generate a new unique variable."""
-        self.variable_counter += 1
-        var = IRVariable(f"v{self.variable_counter}")
+        """Generate a new unique variable using the function's allocator."""
+        assert self.function is not None, "Function must be set before allocating variables"
+        var = self.function.get_next_variable()
         self.available_vars.append(var)
         return var
 
@@ -493,6 +492,10 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
 
     fuzzer.ensure_all_vars_have_values()
 
+    # freshen variable names for easier debugging
+    for fn in fuzzer.ctx.functions.values():
+        fn.freshen_varnames()
+
     return fuzzer.ctx, fuzzer.calldata_offset
 
 

From f6b9bdd9bd99fdb311df0a9683248661dcbe6d08 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Mon, 9 Jun 2025 11:33:30 +0200
Subject: [PATCH 17/24] simplify symbolic vars

---
 tests/functional/venom/test_memory_fuzzer.py | 24 +++++++-------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 122daa4bbf..851891f201 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -106,7 +106,6 @@ def __init__(self):
         self.bb_available_vars = {}
         # symbolic variable tracking
         self.symbolic_counter = 0
-        self.symbolic_mapping = {}  # SymbolicVar -> IRVariable
 
     def get_next_variable(self) -> IRVariable:
         """Generate a new unique variable using the function's allocator."""
@@ -122,26 +121,19 @@ def fresh_symbolic(self) -> SymbolicVar:
 
     def resolve_all_variables(self):
         """After building all blocks, replace symbolic vars with real ones"""
-        # Map all symbolic vars to real variables
+        # resolve "symbolic" vars to real variables
+        symbolic_mapping = defaultdict(self.get_next_variable)
         for bb in self.function.get_basic_blocks():
             for inst in bb.instructions:
-                # Handle output
+                # remap all "symbolic" variables
                 if inst.output and isinstance(inst.output, SymbolicVar):
-                    if inst.output not in self.symbolic_mapping:
-                        self.symbolic_mapping[inst.output] = self.get_next_variable()
-                    inst.output = self.symbolic_mapping[inst.output]
+                    inst.output = symbolic_mapping[inst.output]
 
-                # Handle inputs
                 new_operands = []
                 for op in inst.operands:
                     if isinstance(op, SymbolicVar):
-                        if op not in self.symbolic_mapping:
-                            # This symbolic var was never defined as output
-                            # Create a fresh variable for it
-                            self.symbolic_mapping[op] = self.get_next_variable()
-                        new_operands.append(self.symbolic_mapping[op])
-                    else:
-                        new_operands.append(op)
+                        op = symbolic_mapping[op]
+                    new_operands.append(op)
                 inst.operands = new_operands
 
     def ensure_all_vars_have_values(self) -> None:
@@ -244,7 +236,7 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
         bb.append_instruction("mcopy", dest, src, IRLiteral(length))
 
     else:
-        raise ValueError("unreachable")
+        raise Exception("unreachable")
 
 
 @st.composite
@@ -597,7 +589,7 @@ def venom_with_calldata(draw):
 # )
 @hp.given(venom_data=venom_with_calldata())
 @hp.settings(
-    max_examples=1000,
+    max_examples=10,
     suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow),
     deadline=None,
     phases=(

From dd8f3659d32ad72f4c7a87e09968a8b6f9bde6f6 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Mon, 9 Jun 2025 12:04:35 +0200
Subject: [PATCH 18/24] update back edge logic

---
 tests/functional/venom/test_memory_fuzzer.py | 33 ++++++++++++++------
 vyper/venom/venom_to_assembly.py             |  2 +-
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 851891f201..a6bd545055 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -5,7 +5,7 @@
 memory optimization passes. It uses the IRBasicBlock API directly and
 can be plugged with any Venom passes.
 """
-
+from collections import defaultdict
 from dataclasses import dataclass
 from typing import Optional
 
@@ -82,7 +82,7 @@ class _BranchBB(_BBType):
     counter_addr: Optional[int] = None
 
     @property
-    def has_back_edge(self) -> bool:
+    def needs_loop_counter(self) -> bool:
         return self.counter_addr is not None
 
 
@@ -284,12 +284,19 @@ def control_flow_graph(draw, basic_blocks):
             # For branches, allow any block as the other target except entry
             # (target is already guaranteed to be forward)
             other_target = draw(st.sampled_from(non_entry_blocks))
+
+            is_back_edge = basic_blocks.index(other_target) <= basic_blocks.index(source)
+            # counter_addr = loop_counter_addr if is_back_edge else None
+
+            # if other_target is the back edge, swap so back edge is always target1
+            if is_back_edge:
+                other_target, target = target, other_target
             cfg[source] = _BranchBB(target1=target, target2=other_target)
 
     # classify remaining blocks that were not handled during spanning
     # tree construction.
-    loop_counter_addr = MAX_MEMORY_SIZE
 
+    loop_counter_addr = MAX_MEMORY_SIZE
     for bb in basic_blocks:
         if bb in cfg:
             continue
@@ -313,6 +320,11 @@ def control_flow_graph(draw, basic_blocks):
                 is_back_edge2 = False
 
             contains_back_edge = is_back_edge1 or is_back_edge2
+
+            # swap targets so target2 is always a forward edge
+            if is_back_edge2 and not is_back_edge1:
+                target1, target2 = target2, target1
+
             counter_addr = loop_counter_addr if contains_back_edge else None
 
             cfg[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr)
@@ -458,22 +470,19 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
             # get bottom bit, for bias reasons
             cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
 
-            if bb_type.has_back_edge:
+            if bb_type.needs_loop_counter:
                 loop_counter_addr = IRLiteral(bb_type.counter_addr)
 
                 counter = bb.append_instruction("mload", loop_counter_addr)
                 incr_counter = bb.append_instruction("add", counter, IRLiteral(1))
                 bb.append_instruction("mstore", incr_counter, loop_counter_addr)
 
-                # exit loop when counter >= MAX_LOOP_ITERATIONS
-                # (note we are guaranteed that second target provides forward
-                # progress)
                 max_iterations = IRLiteral(MAX_LOOP_ITERATIONS)
-                # counter < iterbound
                 counter_ok = bb.append_instruction("lt", counter, max_iterations)
 
                 cond_var = bb.append_instruction("and", counter_ok, cond_var)
 
+            # when there is a back edge, target2 is always the forward edge
             bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label)
 
         else:
@@ -504,6 +513,10 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes:
             ac = IRAnalysesCache(fn)
             SimplifyCFGPass(ac, fn).run_pass()
             SingleUseExpansion(ac, fn).run_pass()
+            MakeSSA(ac, fn).run_pass()
+            fn.freshen_varnames()
+
+        hp.note(str(ctx))
 
         compiler = VenomCompiler([ctx])
         asm = compiler.generate_evm()
@@ -551,8 +564,10 @@ def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None:
             ac = IRAnalysesCache(fn)
             MakeSSA(ac, fn).run_pass()
             AssignElimination(ac, fn).run_pass()
+        hp.note("UNOPTIMIZED: " + str(ctx))
 
         opt_ctx = self.run_passes(ctx)
+        hp.note("OPTIMIZED: " + str(opt_ctx))
 
         bytecode1 = self.compile_to_bytecode(ctx)
         bytecode2 = self.compile_to_bytecode(opt_ctx)
@@ -589,7 +604,7 @@ def venom_with_calldata(draw):
 # )
 @hp.given(venom_data=venom_with_calldata())
 @hp.settings(
-    max_examples=10,
+    max_examples=1000,
     suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow),
     deadline=None,
     phases=(
diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py
index f4b28a3bce..b1ffd24baf 100644
--- a/vyper/venom/venom_to_assembly.py
+++ b/vyper/venom/venom_to_assembly.py
@@ -279,7 +279,7 @@ def _emit_input_operands(
                 self.dup_op(assembly, stack, op)
 
             # guaranteed by store expansion
-            assert op not in seen, (op, seen)
+            assert op not in seen, (op, inst)
             seen.add(op)
 
     def _prepare_stack_for_function(self, asm, fn: IRFunction, stack: StackModel):

From 27a09ca77f69ac5dfd1d6687ef9eb71a43d28339 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Thu, 26 Jun 2025 12:34:01 +0200
Subject: [PATCH 19/24] add cfg normalization pass

---
 tests/functional/venom/test_memory_fuzzer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index a6bd545055..1c86c8d2e2 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -21,6 +21,7 @@
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
 from vyper.venom.passes import (
+    CFGNormalization,
     AssignElimination,
     DeadStoreElimination,
     LoadElimination,
@@ -514,6 +515,7 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes:
             SimplifyCFGPass(ac, fn).run_pass()
             SingleUseExpansion(ac, fn).run_pass()
             MakeSSA(ac, fn).run_pass()
+            CFGNormalization(ac, fn).run_pass()
             fn.freshen_varnames()
 
         hp.note(str(ctx))

From cfa0d1d9ab144edd8dd40ca010fc71bf058b2b79 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Thu, 26 Jun 2025 12:34:10 +0200
Subject: [PATCH 20/24] add debug

---
 tests/functional/venom/test_memory_fuzzer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 1c86c8d2e2..ec696f7128 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -606,7 +606,7 @@ def venom_with_calldata(draw):
 # )
 @hp.given(venom_data=venom_with_calldata())
 @hp.settings(
-    max_examples=1000,
+    max_examples=50,
     suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow),
     deadline=None,
     phases=(
@@ -616,6 +616,7 @@ def venom_with_calldata(draw):
         hp.Phase.target,
         # Phase.shrink,  # can force long waiting for examples
     ),
+    verbosity=hp.Verbosity.debug,
 )
 def test_memory_passes_fuzzing(venom_data, env):
     """

From 215efd1efb52b3075f14b074fd74be2d38f7b76c Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Thu, 26 Jun 2025 16:51:50 +0200
Subject: [PATCH 21/24] update variable definitions

---
 tests/functional/venom/test_memory_fuzzer.py | 211 +++++++++++++++++--
 1 file changed, 192 insertions(+), 19 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index ec696f7128..72cc52dd66 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -21,8 +21,8 @@
 from vyper.venom.context import IRContext
 from vyper.venom.function import IRFunction
 from vyper.venom.passes import (
-    CFGNormalization,
     AssignElimination,
+    CFGNormalization,
     DeadStoreElimination,
     LoadElimination,
     MakeSSA,
@@ -105,6 +105,8 @@ def __init__(self):
         self.allocated_memory_slots = set()
         # track which variables are available in each block
         self.bb_available_vars = {}
+        # variables defined in entry block (available everywhere)
+        self.entry_vars = set()
         # symbolic variable tracking
         self.symbolic_counter = 0
 
@@ -120,23 +122,181 @@ def fresh_symbolic(self) -> SymbolicVar:
         self.symbolic_counter += 1
         return SymbolicVar(f"%sym_{self.symbolic_counter}")
 
-    def resolve_all_variables(self):
+    def compute_reachable_blocks(
+        self, cfg: dict[IRBasicBlock, _BBType]
+    ) -> dict[IRBasicBlock, set[IRBasicBlock]]:
+        """Compute which blocks are reachable from each block."""
+        reachable_from = {}
+
+        def get_successors(bb: IRBasicBlock) -> list[IRBasicBlock]:
+            """Get successor blocks based on CFG."""
+            bb_type = cfg.get(bb)
+            if isinstance(bb_type, _JumpBB):
+                return [bb_type.target]
+            elif isinstance(bb_type, _BranchBB):
+                return [bb_type.target1, bb_type.target2]
+            else:
+                return []
+
+        def compute_reachable(
+            block: IRBasicBlock, visited: set[IRBasicBlock] = None
+        ) -> set[IRBasicBlock]:
+            if visited is None:
+                visited = set()
+            if block in visited:
+                return set()
+            visited.add(block)
+
+            result = {block}
+            for succ in get_successors(block):
+                result.update(compute_reachable(succ, visited))
+            return result
+
+        for bb in self.function.get_basic_blocks():
+            reachable_from[bb] = compute_reachable(bb)
+
+        return reachable_from
+
+    def compute_variable_availability(
+        self, cfg: dict[IRBasicBlock, _BBType]
+    ) -> dict[IRBasicBlock, set[IRVariable]]:
+        """Compute which variables are available at each block after resolution."""
+        # First, compute reachability
+        reachable_from = self.compute_reachable_blocks(cfg)
+
+        # Find where each variable is defined
+        var_defs = {}  # var -> defining block
+        for bb in self.function.get_basic_blocks():
+            for inst in bb.instructions:
+                if inst.output and isinstance(inst.output, IRVariable):
+                    var_defs[inst.output] = bb
+
+        # Compute availability
+        available_at = {}  # block -> set of available vars
+        for bb in self.function.get_basic_blocks():
+            available_at[bb] = set()
+
+        # Variables are available in their defining block and all reachable blocks
+        for var, def_block in var_defs.items():
+            for bb in reachable_from[def_block]:
+                available_at[bb].add(var)
+
+        # Entry block variables are available everywhere
+        entry_bb = self.function.entry
+        for inst in entry_bb.instructions:
+            if inst.output and isinstance(inst.output, IRVariable):
+                for bb in self.function.get_basic_blocks():
+                    available_at[bb].add(inst.output)
+
+        return available_at
+
+    def propagate_available_vars(self, cfg: dict[IRBasicBlock, _BBType]) -> None:
+        """Update bb_available_vars to include variables from predecessors."""
+        # Build predecessor map
+        predecessors = defaultdict(list)
+        for bb, bb_type in cfg.items():
+            if isinstance(bb_type, _JumpBB):
+                predecessors[bb_type.target].append(bb)
+            elif isinstance(bb_type, _BranchBB):
+                predecessors[bb_type.target1].append(bb)
+                predecessors[bb_type.target2].append(bb)
+
+        # Initialize with local definitions
+        for bb in self.function.get_basic_blocks():
+            if bb not in self.bb_available_vars:
+                self.bb_available_vars[bb] = []
+
+        # Add entry block variables to all blocks
+        entry_vars = self.bb_available_vars.get(self.function.entry, [])
+        for bb in self.function.get_basic_blocks():
+            if bb != self.function.entry:
+                # Add entry vars that aren't already there
+                for var in entry_vars:
+                    if var not in self.bb_available_vars[bb]:
+                        self.bb_available_vars[bb].append(var)
+
+        # Fixed-point iteration to propagate variables
+        changed = True
+        while changed:
+            changed = False
+            for bb in self.function.get_basic_blocks():
+                if bb == self.function.entry:
+                    continue
+
+                # Variables available at block entry = intersection of predecessor outputs
+                if bb in predecessors and predecessors[bb]:
+                    # Start with variables from first predecessor
+                    available = set(self.bb_available_vars.get(predecessors[bb][0], []))
+
+                    # Intersect with other predecessors
+                    for pred in predecessors[bb][1:]:
+                        available &= set(self.bb_available_vars.get(pred, []))
+
+                    # Add available vars that aren't already tracked
+                    for var in available:
+                        if var not in self.bb_available_vars[bb]:
+                            self.bb_available_vars[bb].append(var)
+                            changed = True
+
+    def resolve_all_variables(self, cfg: dict[IRBasicBlock, _BBType]):
         """After building all blocks, replace symbolic vars with real ones"""
-        # resolve "symbolic" vars to real variables
-        symbolic_mapping = defaultdict(self.get_next_variable)
+        # Compute which variables are available at each block
+        available_at = self.compute_variable_availability(cfg)
+
+        # Track which real variable each symbolic var maps to globally
+        # We need a global mapping because symbolic vars can be used across blocks
+        symbolic_mapping = {}
+
+        # Track variables we've allocated but not yet assigned
+        unassigned_vars = set()
+
+        # First pass: resolve all output variables
         for bb in self.function.get_basic_blocks():
             for inst in bb.instructions:
-                # remap all "symbolic" variables
                 if inst.output and isinstance(inst.output, SymbolicVar):
-                    inst.output = symbolic_mapping[inst.output]
+                    # Create a new variable for outputs
+                    symbolic_var = inst.output
+                    real_var = self.get_next_variable()
+                    inst.output = real_var
+                    symbolic_mapping[symbolic_var] = real_var
+                    # This variable is now available in this block and all reachable blocks
+
+        # Second pass: resolve all operand variables based on what's available
+        for bb in self.function.get_basic_blocks():
+            available_vars = list(available_at.get(bb, set()))
 
+            # Collect instructions that need calldataloads inserted before them
+            insertions = []  # List of (index, instruction) pairs
+
+            for i, inst in enumerate(bb.instructions):
                 new_operands = []
                 for op in inst.operands:
                     if isinstance(op, SymbolicVar):
-                        op = symbolic_mapping[op]
+                        # Check if we already resolved this symbolic var
+                        if op in symbolic_mapping:
+                            real_var = symbolic_mapping[op]
+                        else:
+                            # This symbolic var hasn't been resolved yet
+                            # It must be used before being defined as an output
+                            # Create a fresh variable and initialize it
+                            real_var = self.get_next_variable()
+                            symbolic_mapping[op] = real_var
+
+                            # Schedule calldataload insertion before this instruction
+                            load_inst = IRInstruction(
+                                "calldataload", [IRLiteral(self.calldata_offset)], real_var
+                            )
+                            insertions.append((i, load_inst))
+                            self.calldata_offset += 32
+
+                        op = real_var
                     new_operands.append(op)
                 inst.operands = new_operands
 
+            # Insert calldataloads in reverse order to preserve indices
+            for idx, load_inst in reversed(insertions):
+                bb.insert_instruction(load_inst, index=idx)
+
     def ensure_all_vars_have_values(self) -> None:
         """Ensure all available variables have values by using calldataload for unassigned ones."""
         assigned_vars = set()
@@ -160,17 +320,17 @@ def get_next_bb_label(self) -> IRLabel:
 
     def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable | SymbolicVar:
         """Get a random available variable or create a new one."""
-        available_in_bb = self.bb_available_vars.get(bb, [])
-        if available_in_bb and draw(st.booleans()):
-            return draw(st.sampled_from(available_in_bb))
-        else:
-            return self.fresh_symbolic()
+        if bb not in self.bb_available_vars:
+            self.bb_available_vars[bb] = []
+
+        # During generation phase, always return symbolic variables
+        # They will be resolved to appropriate real variables later based on availability
+        return self.fresh_symbolic()
 
     def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral:
         """Get a memory address, biased towards interesting optimizer-relevant locations."""
-        available_in_bb = self.bb_available_vars.get(bb, [])
-        if available_in_bb and draw(st.booleans()):
-            return draw(st.sampled_from(available_in_bb))
+        # For now, only return literals to avoid cross-block availability issues
+        # TODO: Once we have proper availability tracking, we can use variables again
 
         if self.allocated_memory_slots and draw(st.booleans()):
             # bias towards addresses near existing allocations to create aliasing opportunities
@@ -467,21 +627,32 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
             bb.append_instruction("jmp", bb_type.target.label)
 
         elif isinstance(bb_type, _BranchBB):
+            # Ensure we have available vars tracked for this block
+            if bb not in fuzzer.bb_available_vars:
+                fuzzer.bb_available_vars[bb] = []
+
             cond_var = fuzzer.get_random_variable(draw, bb)
             # get bottom bit, for bias reasons
             cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
+            fuzzer.bb_available_vars[bb].append(cond_var)
 
             if bb_type.needs_loop_counter:
                 loop_counter_addr = IRLiteral(bb_type.counter_addr)
 
                 counter = bb.append_instruction("mload", loop_counter_addr)
+                fuzzer.bb_available_vars[bb].append(counter)
+
                 incr_counter = bb.append_instruction("add", counter, IRLiteral(1))
+                fuzzer.bb_available_vars[bb].append(incr_counter)
+
                 bb.append_instruction("mstore", incr_counter, loop_counter_addr)
 
                 max_iterations = IRLiteral(MAX_LOOP_ITERATIONS)
                 counter_ok = bb.append_instruction("lt", counter, max_iterations)
+                fuzzer.bb_available_vars[bb].append(counter_ok)
 
                 cond_var = bb.append_instruction("and", counter_ok, cond_var)
+                fuzzer.bb_available_vars[bb].append(cond_var)
 
             # when there is a back edge, target2 is always the forward edge
             bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label)
@@ -489,8 +660,11 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
         else:
             raise Exception()  # unreachable
 
+    # propagate available variables through the CFG
+    fuzzer.propagate_available_vars(cfg)
+
     # resolve all symbolic variables to real ones
-    fuzzer.resolve_all_variables()
+    fuzzer.resolve_all_variables(cfg)
 
     fuzzer.ensure_all_vars_have_values()
 
@@ -509,12 +683,11 @@ def __init__(self, passes: list[type]):
 
     def compile_to_bytecode(self, ctx: IRContext) -> bytes:
         """Compile Venom IR context to EVM bytecode."""
-        # assumes MakeSSA has already been run
         for fn in ctx.functions.values():
             ac = IRAnalysesCache(fn)
             SimplifyCFGPass(ac, fn).run_pass()
-            SingleUseExpansion(ac, fn).run_pass()
             MakeSSA(ac, fn).run_pass()
+            SingleUseExpansion(ac, fn).run_pass()
             CFGNormalization(ac, fn).run_pass()
             fn.freshen_varnames()
 
@@ -616,7 +789,7 @@ def venom_with_calldata(draw):
         hp.Phase.target,
         # Phase.shrink,  # can force long waiting for examples
     ),
-    verbosity=hp.Verbosity.debug,
+    # verbosity=hp.Verbosity.debug,
 )
 def test_memory_passes_fuzzing(venom_data, env):
     """

From 6c08d3b942e211292a01d53b911084d83c8cdc33 Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Thu, 26 Jun 2025 21:54:58 +0200
Subject: [PATCH 22/24] fix[venom]: fix last_variable in function copy

---
 vyper/venom/function.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vyper/venom/function.py b/vyper/venom/function.py
index 61a3e74111..f6ff704979 100644
--- a/vyper/venom/function.py
+++ b/vyper/venom/function.py
@@ -162,6 +162,9 @@ def error_msg(self) -> Optional[str]:
 
     def copy(self):
         new = IRFunction(self.name)
+
+        new.last_variable = self.last_variable
+
         new.clear_basic_blocks()
         for bb in self.get_basic_blocks():
             new_bb = bb.copy()

From 351c58e139a32271f6da874baa80bbf390da4a9f Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Thu, 26 Jun 2025 17:29:12 +0200
Subject: [PATCH 23/24] wip - rework variable allocation

---
 tests/functional/venom/test_memory_fuzzer.py | 257 +++----------------
 1 file changed, 41 insertions(+), 216 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 72cc52dd66..95bd61397c 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -101,230 +101,67 @@ def __init__(self):
         self.function = None
         self.bb_counter = 0
         self.calldata_offset = MAX_MEMORY_SIZE
-        self.available_vars = []
         self.allocated_memory_slots = set()
-        # track which variables are available in each block
-        self.bb_available_vars = {}
-        # variables defined in entry block (available everywhere)
-        self.entry_vars = set()
         # symbolic variable tracking
         self.symbolic_counter = 0
 
     def get_next_variable(self) -> IRVariable:
         """Generate a new unique variable using the function's allocator."""
         assert self.function is not None, "Function must be set before allocating variables"
-        var = self.function.get_next_variable()
-        self.available_vars.append(var)
-        return var
+        return self.function.get_next_variable()
 
     def fresh_symbolic(self) -> SymbolicVar:
         """Create a new symbolic variable"""
         self.symbolic_counter += 1
         return SymbolicVar(f"%sym_{self.symbolic_counter}")
 
-    def compute_reachable_blocks(
-        self, cfg: dict[IRBasicBlock, _BBType]
-    ) -> dict[IRBasicBlock, set[IRBasicBlock]]:
-        """Compute which blocks are reachable from each block."""
-        reachable_from = {}
-
-        def get_successors(bb: IRBasicBlock) -> list[IRBasicBlock]:
-            """Get successor blocks based on CFG."""
-            bb_type = cfg.get(bb)
-            if isinstance(bb_type, _JumpBB):
-                return [bb_type.target]
-            elif isinstance(bb_type, _BranchBB):
-                return [bb_type.target1, bb_type.target2]
-            else:
-                return []
-
-        def compute_reachable(
-            block: IRBasicBlock, visited: set[IRBasicBlock] = None
-        ) -> set[IRBasicBlock]:
-            if visited is None:
-                visited = set()
-            if block in visited:
-                return set()
-            visited.add(block)
-
-            result = {block}
-            for succ in get_successors(block):
-                result.update(compute_reachable(succ, visited))
-            return result
-
-        for bb in self.function.get_basic_blocks():
-            reachable_from[bb] = compute_reachable(bb)
-
-        return reachable_from
-
-    def compute_variable_availability(
-        self, cfg: dict[IRBasicBlock, _BBType]
-    ) -> dict[IRBasicBlock, set[IRVariable]]:
-        """Compute which variables are available at each block after resolution."""
-        # First, compute reachability
-        reachable_from = self.compute_reachable_blocks(cfg)
-
-        # Find where each variable is defined
-        var_defs = {}  # var -> defining block
-        for bb in self.function.get_basic_blocks():
-            for inst in bb.instructions:
-                if inst.output and isinstance(inst.output, IRVariable):
-                    var_defs[inst.output] = bb
-
-        # Compute availability
-        available_at = {}  # block -> set of available vars
-        for bb in self.function.get_basic_blocks():
-            available_at[bb] = set()
-
-        # Variables are available in their defining block and all reachable blocks
-        for var, def_block in var_defs.items():
-            for bb in reachable_from[def_block]:
-                available_at[bb].add(var)
-
-        # Entry block variables are available everywhere
-        entry_bb = self.function.entry
-        for inst in entry_bb.instructions:
-            if inst.output and isinstance(inst.output, IRVariable):
-                for bb in self.function.get_basic_blocks():
-                    available_at[bb].add(inst.output)
-
-        return available_at
-
-    def propagate_available_vars(self, cfg: dict[IRBasicBlock, _BBType]) -> None:
-        """Update bb_available_vars to include variables from predecessors."""
-        # Build predecessor map
-        predecessors = defaultdict(list)
-        for bb, bb_type in cfg.items():
-            if isinstance(bb_type, _JumpBB):
-                predecessors[bb_type.target].append(bb)
-            elif isinstance(bb_type, _BranchBB):
-                predecessors[bb_type.target1].append(bb)
-                predecessors[bb_type.target2].append(bb)
-
-        # Initialize with local definitions
-        for bb in self.function.get_basic_blocks():
-            if bb not in self.bb_available_vars:
-                self.bb_available_vars[bb] = []
-
-        # Add entry block variables to all blocks
-        entry_vars = self.bb_available_vars.get(self.function.entry, [])
-        for bb in self.function.get_basic_blocks():
-            if bb != self.function.entry:
-                # Add entry vars that aren't already there
-                for var in entry_vars:
-                    if var not in self.bb_available_vars[bb]:
-                        self.bb_available_vars[bb].append(var)
-
-        # Fixed-point iteration to propagate variables
-        changed = True
-        while changed:
-            changed = False
-            for bb in self.function.get_basic_blocks():
-                if bb == self.function.entry:
-                    continue
-
-                # Variables available at block entry = intersection of predecessor outputs
-                if bb in predecessors and predecessors[bb]:
-                    # Start with variables from first predecessor
-                    available = set(self.bb_available_vars.get(predecessors[bb][0], []))
-
-                    # Intersect with other predecessors
-                    for pred in predecessors[bb][1:]:
-                        available &= set(self.bb_available_vars.get(pred, []))
-
-                    # Add available vars that aren't already tracked
-                    for var in available:
-                        if var not in self.bb_available_vars[bb]:
-                            self.bb_available_vars[bb].append(var)
-                            changed = True
-
     def resolve_all_variables(self, cfg: dict[IRBasicBlock, _BBType]):
         """After building all blocks, replace symbolic vars with real ones"""
-        # Compute which variables are available at each block
-        available_at = self.compute_variable_availability(cfg)
-
-        # Track which real variable each symbolic var maps to globally
-        # We need a global mapping because symbolic vars can be used across blocks
+        # Simple global mapping - each symbolic var gets one real var
         symbolic_mapping = {}
 
-        # Track variables we've allocated but not yet assigned
-        unassigned_vars = set()
-
-        # First pass: resolve all output variables
-        for bb in self.function.get_basic_blocks():
-            for inst in bb.instructions:
-                if inst.output and isinstance(inst.output, SymbolicVar):
-                    # Create a new variable for outputs
-                    symbolic_var = inst.output
-                    real_var = self.get_next_variable()
-                    inst.output = real_var
-                    symbolic_mapping[symbolic_var] = real_var
-                    # This variable is now available in this block and all reachable blocks
-
-        # Second pass: resolve all operand variables based on what's available
         for bb in self.function.get_basic_blocks():
-            available_vars = list(available_at.get(bb, set()))
-
-            # Collect instructions that need calldataloads inserted before them
-            insertions = []  # List of (index, instruction) pairs
+            insertions = []
 
             for i, inst in enumerate(bb.instructions):
+                # First, handle output to allocate variable if needed
+                output_sym = None
+                if inst.output and isinstance(inst.output, SymbolicVar):
+                    output_sym = inst.output
+                    if inst.output not in symbolic_mapping:
+                        symbolic_mapping[inst.output] = self.get_next_variable()
+                    inst.output = symbolic_mapping[inst.output]
+
+                # Then resolve operands
                 new_operands = []
                 for op in inst.operands:
                     if isinstance(op, SymbolicVar):
-                        # Check if we already resolved this symbolic var
-                        if op in symbolic_mapping:
-                            real_var = symbolic_mapping[op]
-                        else:
-                            # This symbolic var hasn't been resolved yet
-                            # It must be used before being defined as an output
-                            # Create a fresh variable and initialize it
+                        if op not in symbolic_mapping:
+                            # First use - create variable and schedule initialization
                             real_var = self.get_next_variable()
                             symbolic_mapping[op] = real_var
-
-                            # Schedule calldataload insertion before this instruction
                             load_inst = IRInstruction(
                                 "calldataload", [IRLiteral(self.calldata_offset)], real_var
                             )
                             insertions.append((i, load_inst))
                             self.calldata_offset += 32
-
-                        op = real_var
+                        op = symbolic_mapping[op]
                     new_operands.append(op)
                 inst.operands = new_operands
 
-            # Insert calldataloads in reverse order to preserve indices
+            # Insert calldataloads
             for idx, load_inst in reversed(insertions):
                 bb.insert_instruction(load_inst, index=idx)
 
-    def ensure_all_vars_have_values(self) -> None:
-        """Ensure all available variables have values by using calldataload for unassigned ones."""
-        assigned_vars = set()
-        for bb in self.function.get_basic_blocks():
-            for inst in bb.instructions:
-                if inst.output:
-                    assigned_vars.add(inst.output)
-
-        entry_bb = self.function.entry
-        unassigned_vars = [var for var in self.available_vars if var not in assigned_vars]
-
-        for i, var in enumerate(unassigned_vars):
-            inst = IRInstruction("calldataload", [IRLiteral(self.calldata_offset)], var)
-            entry_bb.insert_instruction(inst, index=i)
-            self.calldata_offset += 32
-
     def get_next_bb_label(self) -> IRLabel:
         """Generate a new unique basic block label."""
         self.bb_counter += 1
         return IRLabel(f"bb{self.bb_counter}")
 
-    def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable | SymbolicVar:
-        """Get a random available variable or create a new one."""
-        if bb not in self.bb_available_vars:
-            self.bb_available_vars[bb] = []
-
-        # During generation phase, always return symbolic variables
-        # They will be resolved to appropriate real variables later based on availability
+    def get_random_variable(self, draw, bb: IRBasicBlock) -> SymbolicVar:
+        """Get a symbolic variable that will be resolved later."""
+        # Always return symbolic variables during generation phase
+        # They will be resolved to real variables with proper initialization
         return self.fresh_symbolic()
 
     def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral:
@@ -369,22 +206,15 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
     """Generate and append a memory instruction to current basic block."""
     op = draw(st.sampled_from(MEMORY_OPS))
 
-    # track variables defined so far in this block
-    if bb not in fuzzer.bb_available_vars:
-        fuzzer.bb_available_vars[bb] = []
-
     if op == "mload":
         addr = fuzzer.get_memory_address(draw, bb)
         result_var = fuzzer.fresh_symbolic()
         bb.append_instruction("mload", addr, ret=result_var)
-        # add to variables available in this block
-        fuzzer.bb_available_vars[bb].append(result_var)
 
     elif op == "mstore":
-        # can use variables defined earlier in this block
-        available_in_bb = fuzzer.bb_available_vars.get(bb, [])
-        if available_in_bb and draw(st.booleans()):
-            value = draw(st.sampled_from(available_in_bb))
+        # Use either a symbolic variable or a literal
+        if draw(st.booleans()):
+            value = fuzzer.get_random_variable(draw, bb)
         else:
             value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256 - 1)))
         addr = fuzzer.get_memory_address(draw, bb)
@@ -538,10 +368,14 @@ def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
         # unreachable
         raise Exception(f"Unknown precompile: {precompile_name}")
 
-    gas = bb.append_instruction("gas")
+    gas = fuzzer.fresh_symbolic()
+    bb.append_instruction("gas", ret=gas)
     addr = IRLiteral(precompile_addr)
 
-    bb.append_instruction("staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size)
+    success = fuzzer.fresh_symbolic()
+    bb.append_instruction(
+        "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size, ret=success
+    )
 
 
 @st.composite
@@ -627,47 +461,38 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
             bb.append_instruction("jmp", bb_type.target.label)
 
         elif isinstance(bb_type, _BranchBB):
-            # Ensure we have available vars tracked for this block
-            if bb not in fuzzer.bb_available_vars:
-                fuzzer.bb_available_vars[bb] = []
-
             cond_var = fuzzer.get_random_variable(draw, bb)
             # get bottom bit, for bias reasons
-            cond_var = bb.append_instruction("and", cond_var, IRLiteral(1))
-            fuzzer.bb_available_vars[bb].append(cond_var)
+            cond_result = fuzzer.fresh_symbolic()
+            bb.append_instruction("and", cond_var, IRLiteral(1), ret=cond_result)
 
             if bb_type.needs_loop_counter:
                 loop_counter_addr = IRLiteral(bb_type.counter_addr)
 
-                counter = bb.append_instruction("mload", loop_counter_addr)
-                fuzzer.bb_available_vars[bb].append(counter)
-
-                incr_counter = bb.append_instruction("add", counter, IRLiteral(1))
-                fuzzer.bb_available_vars[bb].append(incr_counter)
+                counter = fuzzer.fresh_symbolic()
+                bb.append_instruction("mload", loop_counter_addr, ret=counter)
 
+                incr_counter = fuzzer.fresh_symbolic()
+                bb.append_instruction("add", counter, IRLiteral(1), ret=incr_counter)
                 bb.append_instruction("mstore", incr_counter, loop_counter_addr)
 
                 max_iterations = IRLiteral(MAX_LOOP_ITERATIONS)
-                counter_ok = bb.append_instruction("lt", counter, max_iterations)
-                fuzzer.bb_available_vars[bb].append(counter_ok)
+                counter_ok = fuzzer.fresh_symbolic()
+                bb.append_instruction("lt", counter, max_iterations, ret=counter_ok)
 
-                cond_var = bb.append_instruction("and", counter_ok, cond_var)
-                fuzzer.bb_available_vars[bb].append(cond_var)
+                final_cond = fuzzer.fresh_symbolic()
+                bb.append_instruction("and", counter_ok, cond_result, ret=final_cond)
+                cond_result = final_cond
 
             # when there is a back edge, target2 is always the forward edge
-            bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label)
+            bb.append_instruction("jnz", cond_result, bb_type.target1.label, bb_type.target2.label)
 
         else:
             raise Exception()  # unreachable
 
-    # propagate available variables through the CFG
-    fuzzer.propagate_available_vars(cfg)
-
     # resolve all symbolic variables to real ones
     fuzzer.resolve_all_variables(cfg)
 
-    fuzzer.ensure_all_vars_have_values()
-
     # freshen variable names for easier debugging
     for fn in fuzzer.ctx.functions.values():
         fn.freshen_varnames()

From fd322dea23a6333a394a3d6732518a3455b985ac Mon Sep 17 00:00:00 2001
From: Charles Cooper <cooper.charles.m@gmail.com>
Date: Thu, 26 Jun 2025 23:57:16 +0200
Subject: [PATCH 24/24] some polishing

---
 tests/functional/venom/test_memory_fuzzer.py | 217 ++++++++++++-------
 1 file changed, 144 insertions(+), 73 deletions(-)

diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py
index 95bd61397c..fba2ae4cfb 100644
--- a/tests/functional/venom/test_memory_fuzzer.py
+++ b/tests/functional/venom/test_memory_fuzzer.py
@@ -4,8 +4,14 @@
 This fuzzer generates complex control flow with memory instructions to test
 memory optimization passes. It uses the IRBasicBlock API directly and
 can be plugged with any Venom passes.
+
+The fuzzer works in two phases:
+1. Generation Phase: Creates IR with symbolic variables that can be used before definition
+2. Resolution Phase: Replaces symbolic variables with real variables and inserts initialization
+
+This two-phase approach enables complex cross-block dataflow patterns that would be
+difficult to generate with a single pass.
 """
-from collections import defaultdict
 from dataclasses import dataclass
 from typing import Optional
 
@@ -14,6 +20,7 @@
 import pytest
 
 from tests.evm_backends.base_env import EvmError
+from tests.venom_utils import assert_ctx_eq
 from vyper.ir.compile_ir import assembly_to_evm
 from vyper.venom import VenomCompiler
 from vyper.venom.analysis import IRAnalysesCache
@@ -31,6 +38,10 @@
     SingleUseExpansion,
 )
 
+# ============================================================================
+# Constants
+# ============================================================================
+
 MEMORY_OPS = ["mload", "mstore", "mcopy"]
 
 # precompiles act as fence operations that generate real output data,
@@ -53,6 +64,11 @@
 MAX_LOOP_ITERATIONS = 12
 
 
+# ============================================================================
+# Basic Block Types
+# ============================================================================
+
+
 @dataclass
 class _BBType:
     """Base class for basic block types in the CFG."""
@@ -76,7 +92,11 @@ class _JumpBB(_BBType):
 
 @dataclass
 class _BranchBB(_BBType):
-    """Basic block with conditional branch."""
+    """Basic block with conditional branch.
+
+    Convention: If there's a back edge, target1 is the back edge and
+    target2 is the forward edge. This ensures consistent loop structure.
+    """
 
     target1: IRBasicBlock
     target2: IRBasicBlock
@@ -87,22 +107,44 @@ def needs_loop_counter(self) -> bool:
         return self.counter_addr is not None
 
 
+# ============================================================================
+# Symbolic Variables
+# ============================================================================
+
+
 class SymbolicVar(IRVariable):
-    """Placeholder for a variable that will be resolved later"""
+    """Placeholder for a variable that will be resolved later.
+
+    Symbolic variables enable cross-block dataflow patterns by allowing
+    uses before definitions. During the resolution phase, each symbolic
+    variable is replaced with a real variable and initialized via calldataload
+    if it's used before being defined.
+    """
 
     pass
 
 
+# ============================================================================
+# Memory Fuzzer
+# ============================================================================
+
+
 class MemoryFuzzer:
-    """Generates random Venom IR with memory operations using IRBasicBlock API."""
+    """Generates random Venom IR with memory operations using IRBasicBlock API.
+
+    This fuzzer creates complex control flow patterns with memory operations
+    to stress-test memory optimization passes. It works in two phases:
+
+    1. Generation: Build IR with symbolic variables, allowing flexible dataflow
+    2. Resolution: Replace symbolic variables with real ones and add initialization
+    """
 
     def __init__(self):
         self.ctx = IRContext()
         self.function = None
         self.bb_counter = 0
-        self.calldata_offset = MAX_MEMORY_SIZE
+        self.calldata_offset = MAX_MEMORY_SIZE  # Start after memory seed data
         self.allocated_memory_slots = set()
-        # symbolic variable tracking
         self.symbolic_counter = 0
 
     def get_next_variable(self) -> IRVariable:
@@ -115,7 +157,7 @@ def fresh_symbolic(self) -> SymbolicVar:
         self.symbolic_counter += 1
         return SymbolicVar(f"%sym_{self.symbolic_counter}")
 
-    def resolve_all_variables(self, cfg: dict[IRBasicBlock, _BBType]):
+    def resolve_all_variables(self, block_types: dict[IRBasicBlock, _BBType]):
         """After building all blocks, replace symbolic vars with real ones"""
         # Simple global mapping - each symbolic var gets one real var
         symbolic_mapping = {}
@@ -166,8 +208,7 @@ def get_random_variable(self, draw, bb: IRBasicBlock) -> SymbolicVar:
 
     def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral:
         """Get a memory address, biased towards interesting optimizer-relevant locations."""
-        # For now, only return literals to avoid cross-block availability issues
-        # TODO: Once we have proper availability tracking, we can use variables again
+        # Currently only returns literals to keep fuzzing patterns simple
 
         if self.allocated_memory_slots and draw(st.booleans()):
             # bias towards addresses near existing allocations to create aliasing opportunities
@@ -227,7 +268,12 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
         bb.append_instruction("mcopy", dest, src, IRLiteral(length))
 
     else:
-        raise Exception("unreachable")
+        raise AssertionError(f"Unknown memory operation: {op}")
+
+
+# ============================================================================
+# Control Flow Generation
+# ============================================================================
 
 
 @st.composite
@@ -239,10 +285,10 @@ def control_flow_graph(draw, basic_blocks):
     3. Proper use of jump and branch instructions
     4. No back edges to entry block
     """
-    cfg: dict[IRBasicBlock, _BBType] = {}
+    block_types: dict[IRBasicBlock, _BBType] = {}
 
     # last block is always a return block - guarantees all other blocks have forward targets
-    cfg[basic_blocks[-1]] = _ReturnBB()
+    block_types[basic_blocks[-1]] = _ReturnBB()
 
     # cache forward targets for each block for performance
     forward_targets = {}
@@ -260,43 +306,42 @@ def control_flow_graph(draw, basic_blocks):
         source = draw(st.sampled_from(reachable_blocks))
 
         # we have already visited it
-        if source in cfg:
+        if source in block_types:
             continue
 
         target = draw(st.sampled_from(remaining_blocks))
 
-        # target is now reachable, but it may not be in cfg yet
+        # target is now reachable, but it may not be in block_types yet
         reachable_blocks.append(target)
         remaining_blocks.remove(target)
 
         if draw(st.booleans()):
-            cfg[source] = _JumpBB(target=target)
+            block_types[source] = _JumpBB(target=target)
         else:
             # For branches, allow any block as the other target except entry
             # (target is already guaranteed to be forward)
             other_target = draw(st.sampled_from(non_entry_blocks))
 
             is_back_edge = basic_blocks.index(other_target) <= basic_blocks.index(source)
-            # counter_addr = loop_counter_addr if is_back_edge else None
 
             # if other_target is the back edge, swap so back edge is always target1
             if is_back_edge:
                 other_target, target = target, other_target
-            cfg[source] = _BranchBB(target1=target, target2=other_target)
+            block_types[source] = _BranchBB(target1=target, target2=other_target)
 
     # classify remaining blocks that were not handled during spanning
     # tree construction.
 
     loop_counter_addr = MAX_MEMORY_SIZE
     for bb in basic_blocks:
-        if bb in cfg:
+        if bb in block_types:
             continue
 
         edge_type = draw(st.sampled_from(["jump", "branch"]))
 
         if edge_type == "jump":
             target = draw(st.sampled_from(forward_targets[bb]))
-            cfg[bb] = _JumpBB(target=target)
+            block_types[bb] = _JumpBB(target=target)
         else:  # branch
             # Choose targets, but never allow entry as a target
             target1 = draw(st.sampled_from(non_entry_blocks))
@@ -318,12 +363,12 @@ def control_flow_graph(draw, basic_blocks):
 
             counter_addr = loop_counter_addr if contains_back_edge else None
 
-            cfg[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr)
+            block_types[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr)
 
             if contains_back_edge:
                 loop_counter_addr += 32
 
-    return cfg
+    return block_types
 
 
 @st.composite
@@ -365,16 +410,21 @@ def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None:
         input_size = IRLiteral(213)  # blake2f requires specific input size
         output_size = IRLiteral(64)
     else:
-        # unreachable
-        raise Exception(f"Unknown precompile: {precompile_name}")
+        raise AssertionError(f"Unknown precompile: {precompile_name}")
 
     gas = fuzzer.fresh_symbolic()
     bb.append_instruction("gas", ret=gas)
     addr = IRLiteral(precompile_addr)
 
     success = fuzzer.fresh_symbolic()
-    bb.append_instruction(
-        "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size, ret=success
+    bb.append_instruction( "staticcall",
+        output_size,
+        output_ofst,
+        input_size,
+        input_ofst,
+        addr,
+        gas,
+        ret=success
     )
 
 
@@ -391,7 +441,12 @@ def basic_block_instructions(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> No
         elif inst_type == "precompile":
             draw(precompile_call(fuzzer, bb))
         else:
-            raise Exception("unreachable")
+            raise AssertionError(f"Unknown instruction type: {inst_type}")
+
+
+# ============================================================================
+# Main Generation Function
+# ============================================================================
 
 
 @st.composite
@@ -405,11 +460,13 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
     """
     fuzzer = MemoryFuzzer()
 
+    # ---- Setup function and context ----
     func_name = IRLabel("_fuzz_function", is_symbol=True)
     fuzzer.function = IRFunction(func_name, fuzzer.ctx)
     fuzzer.ctx.functions[func_name] = fuzzer.function
     fuzzer.ctx.entry_function = fuzzer.function
 
+    # ---- Generate basic blocks ----
     num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS))
     basic_blocks = []
 
@@ -428,8 +485,10 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
 
     assert fuzzer.function.entry is basic_blocks[0]
 
-    cfg = draw(control_flow_graph(basic_blocks))
+    # ---- Generate control flow ----
+    block_types = draw(control_flow_graph(basic_blocks))
 
+    # ---- Initialize memory and loop counters ----
     entry_block = basic_blocks[0]
     entry_block.append_instruction(
         "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE)
@@ -437,7 +496,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
 
     # extract loop counter addresses and initialize them
     counter_addrs = set()
-    for bb_type in cfg.values():
+    for bb_type in block_types.values():
         if isinstance(bb_type, _BranchBB) and bb_type.counter_addr is not None:
             addr = bb_type.counter_addr
             assert addr not in counter_addrs, f"Duplicate counter address {addr}"
@@ -446,13 +505,13 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
     for addr in counter_addrs:
         entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr))
 
-    # generate instructions for each block
+    # ---- Generate instructions ----
     for bb in basic_blocks:
         draw(basic_block_instructions(fuzzer, bb))
 
-    # add terminators
+    # ---- Add terminators ----
     for bb in basic_blocks:
-        bb_type = cfg[bb]
+        bb_type = block_types[bb]
 
         if isinstance(bb_type, _ReturnBB):
             bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0))
@@ -488,10 +547,10 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
             bb.append_instruction("jnz", cond_result, bb_type.target1.label, bb_type.target2.label)
 
         else:
-            raise Exception()  # unreachable
+            raise AssertionError(f"Unknown basic block type: {type(bb_type)}")
 
-    # resolve all symbolic variables to real ones
-    fuzzer.resolve_all_variables(cfg)
+    # ---- Phase 2: Resolve symbolic variables ----
+    fuzzer.resolve_all_variables(block_types)
 
     # freshen variable names for easier debugging
     for fn in fuzzer.ctx.functions.values():
@@ -500,6 +559,11 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]:
     return fuzzer.ctx, fuzzer.calldata_offset
 
 
+# ============================================================================
+# Memory Pass Checker
+# ============================================================================
+
+
 class MemoryFuzzChecker:
     """A pluggable checker for memory passes using fuzzing."""
 
@@ -514,9 +578,8 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes:
             MakeSSA(ac, fn).run_pass()
             SingleUseExpansion(ac, fn).run_pass()
             CFGNormalization(ac, fn).run_pass()
-            fn.freshen_varnames()
 
-        hp.note(str(ctx))
+        # hp.note(str(ctx))
 
         compiler = VenomCompiler([ctx])
         asm = compiler.generate_evm()
@@ -554,8 +617,9 @@ def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool,
         try:
             result = env.message_call(to=deployed_address, data=calldata)
             return True, result
-        except EvmError:
-            return False, b""
+        except EvmError as e:
+            # stub for future handling of programs that are actually expected to revert
+            raise
 
     def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None:
         """Check equivalence between unoptimized and optimized execution."""
@@ -564,21 +628,49 @@ def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None:
             ac = IRAnalysesCache(fn)
             MakeSSA(ac, fn).run_pass()
             AssignElimination(ac, fn).run_pass()
-        hp.note("UNOPTIMIZED: " + str(ctx))
+            fn.freshen_varnames()
 
         opt_ctx = self.run_passes(ctx)
+        for fn in opt_ctx.functions.values():
+            fn.freshen_varnames()
+
+        try:
+            assert_ctx_eq(ctx, opt_ctx)
+        except AssertionError as e:
+            equals = False
+            msg = e.args[0]
+        else:
+            equals = True
+
+        if equals:
+            hp.note("No optimization done")
+            return
+        hp.note("UNOPTIMIZED: " + str(ctx))
         hp.note("OPTIMIZED: " + str(opt_ctx))
+        hp.note("optimizations: " + str(msg))
 
         bytecode1 = self.compile_to_bytecode(ctx)
         bytecode2 = self.compile_to_bytecode(opt_ctx)
 
+        hp.note(f"MSG CALL {calldata.hex()}")
+
         succ1, out1 = self.execute_bytecode(bytecode1, calldata, env)
         succ2, out2 = self.execute_bytecode(bytecode2, calldata, env)
 
+        if not succ1 or not succ2:
+            hp.note("reverted")
+        else:
+            hp.note(f"OUT {out1.hex()}")
+
         assert succ1 == succ2, (succ1, out1, succ2, out2)
         assert out1 == out2, (succ1, out1, succ2, out2)
 
 
+# ============================================================================
+# Test Helpers
+# ============================================================================
+
+
 @st.composite
 def venom_with_calldata(draw):
     """Generate Venom IR context with matching calldata."""
@@ -587,34 +679,19 @@ def venom_with_calldata(draw):
     return ctx, calldata
 
 
-# Test with memory-related passes
+# ============================================================================
+# Property-Based Tests
+# ============================================================================
+
+
 @pytest.mark.fuzzing
-# @pytest.mark.parametrize(
-#    "pass_list",
-#    [
-#        # Test individual memory passes
-#        [MemMergePass],
-#        [LoadElimination],
-#        [DeadStoreElimination],
-#        # Test combinations
-#        [LoadElimination, DeadStoreElimination],
-#        [DeadStoreElimination, LoadElimination],
-#        [LoadElimination, MemMergePass],
-#    ],
-# )
 @hp.given(venom_data=venom_with_calldata())
 @hp.settings(
-    max_examples=50,
+    max_examples=1000,
     suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow),
     deadline=None,
-    phases=(
-        hp.Phase.explicit,
-        hp.Phase.reuse,
-        hp.Phase.generate,
-        hp.Phase.target,
-        # Phase.shrink,  # can force long waiting for examples
-    ),
-    # verbosity=hp.Verbosity.debug,
+    phases=(hp.Phase.explicit, hp.Phase.reuse, hp.Phase.generate, hp.Phase.target),
+    verbosity=hp.Verbosity.verbose,
 )
 def test_memory_passes_fuzzing(venom_data, env):
     """
@@ -625,17 +702,15 @@ def test_memory_passes_fuzzing(venom_data, env):
     pass_list = [MemMergePass]
     ctx, calldata = venom_data
 
-    hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}")
-
-    func = list(ctx.functions.values())[0]
-    hp.note(f"Generated function with {func.num_basic_blocks} basic blocks")
-    hp.note(f"Calldata size: {len(calldata)} bytes")
-    hp.note(str(ctx))
-
     checker = MemoryFuzzChecker(pass_list)
     checker.check_equivalence(ctx, calldata, env)
 
 
+# ============================================================================
+# Manual Testing
+# ============================================================================
+
+
 def generate_sample_ir() -> IRContext:
     """Generate a sample IR for manual inspection."""
     ctx, _ = venom_function_with_memory_ops().example()
@@ -644,10 +719,6 @@ def generate_sample_ir() -> IRContext:
 
 if __name__ == "__main__":
     ctx = generate_sample_ir()
-
-    # func = list(ctx.functions.values())[0]
-    # print(func)
-
     checker = MemoryFuzzChecker([MemMergePass])
     checker.run_passes(ctx)
     print(ctx)