From 91c9035145e23e33585fd887b175cb4985f71733 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 14:29:33 +0200 Subject: [PATCH 01/24] draft - memory fuzzer for venom --- tests/functional/venom/test_memory_fuzzer.py | 434 +++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 tests/functional/venom/test_memory_fuzzer.py diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py new file mode 100644 index 0000000000..bde2174c8c --- /dev/null +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -0,0 +1,434 @@ +""" +Memory fuzzer for Venom IR. + +This fuzzer generates complex control flow with memory instructions to test +memory optimization passes. It uses the IRBasicBlock API directly and +can be plugged with any Venom passes. +""" + +import pytest +import hypothesis as hp +import hypothesis.strategies as st +from typing import List, Optional, Set + +from tests.venom_utils import PrePostChecker +from tests.hevm import hevm_check_venom_ctx +from vyper.venom.analysis import IRAnalysesCache +from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRVariable, IRLiteral, IRLabel +from vyper.venom.context import IRContext +from vyper.venom.function import IRFunction +from vyper.venom.passes.base_pass import IRPass + +# Memory operations that can be fuzzed +MEMORY_OPS = ["mload", "mstore", "mcopy"] + +# Precompile addresses for fence operations that generate real data +PRECOMPILES = { + 0x1: "ecrecover", # Returns 32 bytes + 0x2: "sha256", # Returns 32 bytes + 0x3: "ripemd160", # Returns 32 bytes + 0x4: "identity", # Returns input data + 0x5: "modexp", # Returns variable length + 0x6: "ecadd", # Returns 64 bytes + 0x7: "ecmul", # Returns 64 bytes + 0x8: "ecpairing", # Returns 32 bytes + 0x9: "blake2f", # Returns 64 bytes +} + +# Constants for fuzzing +MAX_MEMORY_SIZE = 4096 # Limit memory to 4096 bytes +MAX_BASIC_BLOCKS = 8 +MAX_INSTRUCTIONS_PER_BLOCK = 8 +MAX_VARIABLES = 20 + + +class MemoryFuzzer: + """Generates random Venom IR with memory operations using IRBasicBlock API.""" + + def __init__(self, seed_memory: bool = True, allow_params: bool = True): + self.seed_memory = seed_memory + self.allow_params = allow_params + self.ctx = IRContext() + self.function = None + self.variable_counter = 0 + self.bb_counter = 0 + self.available_vars = [] # Variables available for use + + def get_next_variable(self) -> IRVariable: + """Generate a new unique variable.""" + self.variable_counter += 1 + var = IRVariable(f"v{self.variable_counter}") + self.available_vars.append(var) + return var + + def get_next_bb_label(self) -> IRLabel: + """Generate a new unique basic block label.""" + self.bb_counter += 1 + return IRLabel(f"bb{self.bb_counter}") + + def get_random_variable(self, draw) -> IRVariable: + """Get a random available variable or create a new one.""" + if self.available_vars and draw(st.booleans()): + return draw(st.sampled_from(self.available_vars)) + else: + return self.get_next_variable() + + def get_memory_address(self, draw) -> IRVariable | IRLiteral: + """Get a memory address (either variable or aligned literal).""" + if self.available_vars and draw(st.booleans()): + return draw(st.sampled_from(self.available_vars)) + else: + # Generate aligned memory addresses (multiples of 32) + addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32)) & ~31 + return IRLiteral(addr) + + +@st.composite +def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict: + """Generate a complex control flow graph structure.""" + num_blocks = draw(st.integers(min_value=2, max_value=max_blocks)) + + # Create adjacency list representation + # Block 0 is always the entry, highest numbered block is always the exit + edges = {} + + for i in range(num_blocks): + edges[i] = [] + + # Ensure connectivity: each block (except exit) has at least one outgoing edge + for i in range(num_blocks - 1): + # Add at least one outgoing edge to ensure no dead blocks + if i == num_blocks - 2: + # Second-to-last block must connect to exit + edges[i].append(num_blocks - 1) + else: + # Can connect to any later block + target = draw(st.integers(min_value=i + 1, max_value=num_blocks - 1)) + edges[i].append(target) + + # Add some additional random edges for complexity + for i in range(num_blocks - 1): + # Chance to add more outgoing edges + if draw(st.booleans()): + # Don't create too many edges + max_additional = min(2, num_blocks - i - 2) + if max_additional > 0: + num_additional = draw(st.integers(min_value=0, max_value=max_additional)) + for _ in range(num_additional): + # Choose a target we're not already connected to + possible_targets = [j for j in range(i + 1, num_blocks) if j not in edges[i]] + if possible_targets: + target = draw(st.sampled_from(possible_targets)) + edges[i].append(target) + + return {"num_blocks": num_blocks, "edges": edges} + + +@st.composite +def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: + """Generate and append a memory instruction to current basic block.""" + op = draw(st.sampled_from(MEMORY_OPS)) + bb = fuzzer.current_bb + + if op == "mload": + # %result = mload %addr + addr = fuzzer.get_memory_address(draw) + result_var = bb.append_instruction("mload", addr) + + elif op == "mstore": + # mstore %value, %addr + value = fuzzer.get_random_variable(draw) if fuzzer.available_vars else IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1))) + addr = fuzzer.get_memory_address(draw) + bb.append_instruction("mstore", value, addr) + + elif op == "mcopy": + # mcopy %dest, %src, %length + dest = fuzzer.get_memory_address(draw) + src = fuzzer.get_memory_address(draw) + length = IRLiteral(32) # Copy 32 bytes + bb.append_instruction("mcopy", dest, src, length) + + +@st.composite +def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: + """Generate a call to a precompile that produces real output data.""" + bb = fuzzer.current_bb + + # Choose a precompile + precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys()))) + precompile_name = PRECOMPILES[precompile_addr] + + # Set up input data in memory + input_offset = fuzzer.get_memory_address(draw) + output_offset = fuzzer.get_memory_address(draw) + + if precompile_name == "identity": + # Identity precompile - copies input to output + input_size = IRLiteral(32) + output_size = IRLiteral(32) + elif precompile_name == "sha256": + # SHA256 - takes any input, outputs 32 bytes + input_size = IRLiteral(64) # Use 64 bytes input + output_size = IRLiteral(32) + elif precompile_name == "blake2f": + # Blake2f - outputs 64 bytes + input_size = IRLiteral(213) # Blake2f requires 213 bytes input + output_size = IRLiteral(64) + elif precompile_name in ["ecadd", "ecmul"]: + # EC operations - specific input/output sizes + input_size = IRLiteral(96) # EC point operations + output_size = IRLiteral(64) + else: + # Default case + input_size = IRLiteral(32) + output_size = IRLiteral(32) + + # Call the precompile + gas = IRLiteral(100000) # Plenty of gas + addr = IRLiteral(precompile_addr) + value = IRLiteral(0) + + result_var = bb.append_instruction("staticcall", gas, addr, input_offset, input_size, output_offset, output_size) + + +@st.composite +def seed_memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: + """Generate an instruction that seeds memory with data.""" + bb = fuzzer.current_bb + + if fuzzer.allow_params: + # Use calldataload to get "random" data from parameters + offset = IRLiteral(draw(st.integers(min_value=0, max_value=256, step=32))) + data_var = bb.append_instruction("calldataload", offset) + + # Store it in memory + mem_addr = fuzzer.get_memory_address(draw) + bb.append_instruction("mstore", data_var, mem_addr) + else: + # Just store a literal value + value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1))) + mem_addr = fuzzer.get_memory_address(draw) + bb.append_instruction("mstore", value, mem_addr) + + +@st.composite +def basic_block_instructions(draw, fuzzer: MemoryFuzzer, is_entry: bool = False) -> None: + """Generate instructions for a basic block.""" + + # For entry block, seed some memory first + if is_entry and fuzzer.seed_memory: + num_seeds = draw(st.integers(min_value=1, max_value=3)) + for _ in range(num_seeds): + draw(seed_memory_instruction(fuzzer)) + + # Generate main instructions + num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK)) + + for _ in range(num_instructions): + # Choose instruction type + inst_type = draw(st.sampled_from(["memory", "precompile", "seed"])) + + if inst_type == "memory": + draw(memory_instruction(fuzzer)) + elif inst_type == "precompile": + draw(precompile_call(fuzzer)) + elif inst_type == "seed": + draw(seed_memory_instruction(fuzzer)) + + +@st.composite +def venom_function_with_memory_ops(draw) -> IRContext: + """Generate a complete Venom IR function using IRBasicBlock API.""" + + fuzzer = MemoryFuzzer(seed_memory=True, allow_params=True) + + # Create function + func_name = IRLabel("_fuzz_function", is_symbol=True) + fuzzer.function = IRFunction(func_name, fuzzer.ctx) + fuzzer.ctx.functions[func_name] = fuzzer.function + fuzzer.ctx.entry_function = fuzzer.function + + # Generate control flow structure + cfg = draw(control_flow_graph()) + num_blocks = cfg["num_blocks"] + edges = cfg["edges"] + + # Create all basic blocks first + basic_blocks = [] + for i in range(num_blocks): + if i == 0: + label = IRLabel("entry") + else: + label = fuzzer.get_next_bb_label() + + bb = IRBasicBlock(label, fuzzer.function) + fuzzer.function._basic_block_dict[label.value] = bb + basic_blocks.append(bb) + + # Set entry block + fuzzer.function.entry = basic_blocks[0] + + # Generate instructions for each block + for i, bb in enumerate(basic_blocks): + fuzzer.current_bb = bb + + # Generate block content + is_entry = (i == 0) + draw(basic_block_instructions(fuzzer, is_entry=is_entry)) + + # Add terminator instruction + outgoing_edges = edges[i] + + if i == num_blocks - 1: + # Exit block - return memory contents + bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0)) + elif len(outgoing_edges) == 1: + # Single outgoing edge - unconditional jump + target_bb = basic_blocks[outgoing_edges[0]] + bb.append_instruction("jmp", target_bb.label) + elif len(outgoing_edges) == 2: + # Two outgoing edges - conditional jump + # Create condition based on memory contents or available variable + if fuzzer.available_vars: + cond_var = draw(st.sampled_from(fuzzer.available_vars)) + else: + # Load something from memory as condition + cond_var = bb.append_instruction("mload", IRLiteral(0)) + + target1_bb = basic_blocks[outgoing_edges[0]] + target2_bb = basic_blocks[outgoing_edges[1]] + bb.append_instruction("jnz", target1_bb.label, target2_bb.label, cond_var) + else: + # Multiple edges - use djmp (dynamic jump table) + if fuzzer.available_vars: + selector_var = draw(st.sampled_from(fuzzer.available_vars)) + else: + selector_var = bb.append_instruction("mload", IRLiteral(0)) + + # Create jump table + target_labels = [basic_blocks[edge].label for edge in outgoing_edges] + bb.append_instruction("djmp", selector_var, *target_labels) + + return fuzzer.ctx + + +class MemoryFuzzChecker: + """A pluggable checker for memory passes using fuzzing.""" + + def __init__(self, passes: List[type], post_passes: List[type] = None): + self.passes = passes + self.post_passes = post_passes or [] + + def check_memory_equivalence(self, ctx: IRContext) -> bool: + """ + Check that memory passes preserve semantics by comparing execution. + + Returns True if optimized and unoptimized versions are equivalent. + """ + try: + # Deep copy the context for optimization + import copy + unoptimized_ctx = copy.deepcopy(ctx) + optimized_ctx = copy.deepcopy(ctx) + + # Apply passes to optimized version + for fn in optimized_ctx.functions.values(): + ac = IRAnalysesCache(fn) + for pass_class in self.passes: + pass_obj = pass_class(ac, fn) + pass_obj.run_pass() + + # Apply post passes + for pass_class in self.post_passes: + pass_obj = pass_class(ac, fn) + pass_obj.run_pass() + + # Use hevm to check equivalence if available + try: + hevm_check_venom_ctx(unoptimized_ctx, optimized_ctx) + return True + except Exception as e: + # If hevm fails, we assume the optimization broke semantics + hp.note(f"HEVM equivalence check failed: {e}") + return False + + except Exception as e: + # If optimization fails, skip this test case + hp.note(f"Optimization failed: {e}") + hp.assume(False) + return False + + +# Test with memory-related passes +@pytest.mark.fuzzing +@pytest.mark.parametrize("pass_list", [ + # Test individual memory passes + [__import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass], + [__import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass], + + # Test combinations + [ + __import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass, + __import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass, + ], +]) +@hp.given(ctx=venom_function_with_memory_ops()) +@hp.settings( + max_examples=100, + suppress_health_check=( + hp.HealthCheck.data_too_large, + hp.HealthCheck.too_slow, + hp.HealthCheck.filter_too_much, + ), + deadline=None, +) +def test_memory_passes_fuzzing(pass_list, ctx): + """ + Property-based test for memory optimization passes. + + Tests that memory passes preserve semantics by comparing execution + between optimized and unoptimized versions. + """ + hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}") + + # Log the generated IR for debugging + if hasattr(ctx, 'functions') and ctx.functions: + func = list(ctx.functions.values())[0] + hp.note(f"Generated function with {len(func._basic_block_dict)} basic blocks") + for bb_name, bb in func._basic_block_dict.items(): + hp.note(f"Block {bb_name}: {len(bb.instructions)} instructions") + + checker = MemoryFuzzChecker(pass_list) + + # The property we're testing: optimization should preserve semantics + assert checker.check_memory_equivalence(ctx), "Memory optimization broke semantics" + + +# Utility function for manual testing +def generate_sample_ir() -> IRContext: + """Generate a sample IR for manual inspection.""" + import random + random.seed(42) + + # Create a hypothesis example + ctx = venom_function_with_memory_ops().example() + return ctx + + +if __name__ == "__main__": + # Example usage + ctx = generate_sample_ir() + + if ctx and ctx.functions: + func = list(ctx.functions.values())[0] + print(f"Generated function with {len(func._basic_block_dict)} basic blocks:") + print(func) + + # Test with a simple pass + try: + from vyper.venom.passes.load_elimination import LoadEliminationPass + checker = MemoryFuzzChecker([LoadEliminationPass]) + result = checker.check_memory_equivalence(ctx) + print(f"\nEquivalence check result: {result}") + except ImportError: + print("Could not import LoadEliminationPass for testing") \ No newline at end of file From ebdac4df8fc49eee723463b48f5697f707345955 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 15:04:04 +0200 Subject: [PATCH 02/24] improvements / tuning --- tests/functional/venom/test_memory_fuzzer.py | 290 +++++++++++-------- 1 file changed, 171 insertions(+), 119 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index bde2174c8c..eda506eb3b 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -6,15 +6,16 @@ can be plugged with any Venom passes. """ -import pytest +from typing import List, Optional, Set + import hypothesis as hp import hypothesis.strategies as st -from typing import List, Optional, Set +import pytest -from tests.venom_utils import PrePostChecker from tests.hevm import hevm_check_venom_ctx +from tests.venom_utils import PrePostChecker from vyper.venom.analysis import IRAnalysesCache -from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRVariable, IRLiteral, IRLabel +from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.passes.base_pass import IRPass @@ -24,15 +25,15 @@ # Precompile addresses for fence operations that generate real data PRECOMPILES = { - 0x1: "ecrecover", # Returns 32 bytes - 0x2: "sha256", # Returns 32 bytes - 0x3: "ripemd160", # Returns 32 bytes - 0x4: "identity", # Returns input data - 0x5: "modexp", # Returns variable length - 0x6: "ecadd", # Returns 64 bytes - 0x7: "ecmul", # Returns 64 bytes - 0x8: "ecpairing", # Returns 32 bytes - 0x9: "blake2f", # Returns 64 bytes + 0x1: "ecrecover", # Returns 32 bytes + 0x2: "sha256", # Returns 32 bytes + 0x3: "ripemd160", # Returns 32 bytes + 0x4: "identity", # Returns input data + 0x5: "modexp", # Returns variable length + 0x6: "ecadd", # Returns 64 bytes + 0x7: "ecmul", # Returns 64 bytes + 0x8: "ecpairing", # Returns 32 bytes + 0x9: "blake2f", # Returns 64 bytes } # Constants for fuzzing @@ -44,57 +45,91 @@ class MemoryFuzzer: """Generates random Venom IR with memory operations using IRBasicBlock API.""" - - def __init__(self, seed_memory: bool = True, allow_params: bool = True): - self.seed_memory = seed_memory - self.allow_params = allow_params + + def __init__(self): self.ctx = IRContext() self.function = None self.variable_counter = 0 self.bb_counter = 0 + self.calldata_offset = 0 self.available_vars = [] # Variables available for use - + self.allocated_memory_slots = set() # Track memory addresses that have been used + def get_next_variable(self) -> IRVariable: """Generate a new unique variable.""" self.variable_counter += 1 var = IRVariable(f"v{self.variable_counter}") self.available_vars.append(var) return var - + + def ensure_all_vars_have_values(self) -> None: + """Ensure all available variables have values by using calldataload for unassigned ones.""" + # Find all variables that are outputs of instructions + assigned_vars = set() + for bb in self.function._basic_block_dict.values(): + for inst in bb.instructions: + if inst.output: + assigned_vars.add(inst.output) + + # For variables that don't have values, add calldataload at the beginning + entry_bb = self.function.entry + unassigned_vars = [var for var in self.available_vars if var not in assigned_vars] + + for i, var in enumerate(unassigned_vars): + # Insert calldataload at the beginning of the entry block + inst = IRInstruction("calldataload", [IRLiteral(self.calldata_offset)], var) + entry_bb.insert_instruction(inst, index=i) + self.calldata_offset += 32 + def get_next_bb_label(self) -> IRLabel: - """Generate a new unique basic block label.""" + """Generate a new unique basic block label.""" self.bb_counter += 1 return IRLabel(f"bb{self.bb_counter}") - + def get_random_variable(self, draw) -> IRVariable: """Get a random available variable or create a new one.""" if self.available_vars and draw(st.booleans()): return draw(st.sampled_from(self.available_vars)) else: return self.get_next_variable() - + def get_memory_address(self, draw) -> IRVariable | IRLiteral: - """Get a memory address (either variable or aligned literal).""" + """Get a memory address, biased towards interesting optimizer-relevant locations.""" + # 50% chance to use existing variable if self.available_vars and draw(st.booleans()): return draw(st.sampled_from(self.available_vars)) + + # Generate literal address + if self.allocated_memory_slots and draw(st.booleans()): + # Bias towards addresses near existing allocations + base_addr = draw(st.sampled_from(list(self.allocated_memory_slots))) + + # Random offset biased towards edges (0 and 32 are most common) + offset = draw(st.integers(min_value=-32, max_value=32)) + if draw(st.booleans()): # 50% chance to snap to edge + offset = 0 if abs(offset) < 16 else (32 if offset > 0 else -32) + + addr = max(0, min(MAX_MEMORY_SIZE - 32, base_addr + offset)) else: - # Generate aligned memory addresses (multiples of 32) - addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32)) & ~31 - return IRLiteral(addr) + # Random address anywhere in memory + addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32)) + self.allocated_memory_slots.add(addr) + return IRLiteral(addr) -@st.composite + +@st.composite def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict: """Generate a complex control flow graph structure.""" num_blocks = draw(st.integers(min_value=2, max_value=max_blocks)) - + # Create adjacency list representation # Block 0 is always the entry, highest numbered block is always the exit edges = {} - + for i in range(num_blocks): edges[i] = [] - + # Ensure connectivity: each block (except exit) has at least one outgoing edge for i in range(num_blocks - 1): # Add at least one outgoing edge to ensure no dead blocks @@ -102,10 +137,10 @@ def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict: # Second-to-last block must connect to exit edges[i].append(num_blocks - 1) else: - # Can connect to any later block + # Can connect to any later block target = draw(st.integers(min_value=i + 1, max_value=num_blocks - 1)) edges[i].append(target) - + # Add some additional random edges for complexity for i in range(num_blocks - 1): # Chance to add more outgoing edges @@ -120,7 +155,7 @@ def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict: if possible_targets: target = draw(st.sampled_from(possible_targets)) edges[i].append(target) - + return {"num_blocks": num_blocks, "edges": edges} @@ -129,39 +164,56 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: """Generate and append a memory instruction to current basic block.""" op = draw(st.sampled_from(MEMORY_OPS)) bb = fuzzer.current_bb - + if op == "mload": # %result = mload %addr addr = fuzzer.get_memory_address(draw) result_var = bb.append_instruction("mload", addr) - + elif op == "mstore": # mstore %value, %addr - value = fuzzer.get_random_variable(draw) if fuzzer.available_vars else IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1))) + # Random choice between variable and literal for value + if fuzzer.available_vars and draw(st.booleans()): + value = draw(st.sampled_from(fuzzer.available_vars)) + else: + value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256 - 1))) addr = fuzzer.get_memory_address(draw) bb.append_instruction("mstore", value, addr) - + elif op == "mcopy": # mcopy %dest, %src, %length dest = fuzzer.get_memory_address(draw) src = fuzzer.get_memory_address(draw) - length = IRLiteral(32) # Copy 32 bytes - bb.append_instruction("mcopy", dest, src, length) + + # Bias towards small lengths (more interesting for optimizers) + if draw(st.booleans()): + # Small lengths (1-96 bytes, biased towards 32-byte multiples) + if draw(st.booleans()): + length = draw( + st.sampled_from([1, 2, 4, 8, 16, 20, 24, 28, 31, 32, 33, 36, 40, 48, 64, 96]) + ) + else: + length = draw(st.integers(min_value=1, max_value=96)) + else: + # Larger lengths (up to 1KB) + length = draw(st.integers(min_value=97, max_value=1024)) + + bb.append_instruction("mcopy", dest, src, IRLiteral(length)) @st.composite def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: """Generate a call to a precompile that produces real output data.""" bb = fuzzer.current_bb - + # Choose a precompile precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys()))) precompile_name = PRECOMPILES[precompile_addr] - + # Set up input data in memory input_offset = fuzzer.get_memory_address(draw) output_offset = fuzzer.get_memory_address(draw) - + if precompile_name == "identity": # Identity precompile - copies input to output input_size = IRLiteral(32) @@ -182,77 +234,57 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: # Default case input_size = IRLiteral(32) output_size = IRLiteral(32) - + # Call the precompile - gas = IRLiteral(100000) # Plenty of gas + gas = bb.append_instruction("gas") # Use all available gas addr = IRLiteral(precompile_addr) value = IRLiteral(0) - - result_var = bb.append_instruction("staticcall", gas, addr, input_offset, input_size, output_offset, output_size) - -@st.composite -def seed_memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: - """Generate an instruction that seeds memory with data.""" - bb = fuzzer.current_bb - - if fuzzer.allow_params: - # Use calldataload to get "random" data from parameters - offset = IRLiteral(draw(st.integers(min_value=0, max_value=256, step=32))) - data_var = bb.append_instruction("calldataload", offset) - - # Store it in memory - mem_addr = fuzzer.get_memory_address(draw) - bb.append_instruction("mstore", data_var, mem_addr) - else: - # Just store a literal value - value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256-1))) - mem_addr = fuzzer.get_memory_address(draw) - bb.append_instruction("mstore", value, mem_addr) + result_var = bb.append_instruction( + "staticcall", gas, addr, input_offset, input_size, output_offset, output_size + ) @st.composite def basic_block_instructions(draw, fuzzer: MemoryFuzzer, is_entry: bool = False) -> None: """Generate instructions for a basic block.""" - - # For entry block, seed some memory first - if is_entry and fuzzer.seed_memory: - num_seeds = draw(st.integers(min_value=1, max_value=3)) - for _ in range(num_seeds): - draw(seed_memory_instruction(fuzzer)) - + + # For entry block, seed memory first + if is_entry: + bb.append_instruction( + "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE) + ) + # Generate main instructions num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK)) - + for _ in range(num_instructions): # Choose instruction type - inst_type = draw(st.sampled_from(["memory", "precompile", "seed"])) - + inst_type = draw(st.sampled_from(["memory", "precompile"])) + if inst_type == "memory": draw(memory_instruction(fuzzer)) - elif inst_type == "precompile": + elif inst_type == "precompile": draw(precompile_call(fuzzer)) - elif inst_type == "seed": - draw(seed_memory_instruction(fuzzer)) @st.composite def venom_function_with_memory_ops(draw) -> IRContext: """Generate a complete Venom IR function using IRBasicBlock API.""" - - fuzzer = MemoryFuzzer(seed_memory=True, allow_params=True) - + + fuzzer = MemoryFuzzer() + # Create function func_name = IRLabel("_fuzz_function", is_symbol=True) fuzzer.function = IRFunction(func_name, fuzzer.ctx) fuzzer.ctx.functions[func_name] = fuzzer.function fuzzer.ctx.entry_function = fuzzer.function - + # Generate control flow structure cfg = draw(control_flow_graph()) num_blocks = cfg["num_blocks"] edges = cfg["edges"] - + # Create all basic blocks first basic_blocks = [] for i in range(num_blocks): @@ -260,25 +292,25 @@ def venom_function_with_memory_ops(draw) -> IRContext: label = IRLabel("entry") else: label = fuzzer.get_next_bb_label() - + bb = IRBasicBlock(label, fuzzer.function) fuzzer.function._basic_block_dict[label.value] = bb basic_blocks.append(bb) - + # Set entry block fuzzer.function.entry = basic_blocks[0] - + # Generate instructions for each block for i, bb in enumerate(basic_blocks): fuzzer.current_bb = bb - + # Generate block content - is_entry = (i == 0) + is_entry = i == 0 draw(basic_block_instructions(fuzzer, is_entry=is_entry)) - + # Add terminator instruction outgoing_edges = edges[i] - + if i == num_blocks - 1: # Exit block - return memory contents bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0)) @@ -294,7 +326,7 @@ def venom_function_with_memory_ops(draw) -> IRContext: else: # Load something from memory as condition cond_var = bb.append_instruction("mload", IRLiteral(0)) - + target1_bb = basic_blocks[outgoing_edges[0]] target2_bb = basic_blocks[outgoing_edges[1]] bb.append_instruction("jnz", target1_bb.label, target2_bb.label, cond_var) @@ -304,45 +336,49 @@ def venom_function_with_memory_ops(draw) -> IRContext: selector_var = draw(st.sampled_from(fuzzer.available_vars)) else: selector_var = bb.append_instruction("mload", IRLiteral(0)) - + # Create jump table target_labels = [basic_blocks[edge].label for edge in outgoing_edges] bb.append_instruction("djmp", selector_var, *target_labels) - + + # Ensure all variables have values before returning + fuzzer.ensure_all_vars_have_values() + return fuzzer.ctx class MemoryFuzzChecker: """A pluggable checker for memory passes using fuzzing.""" - + def __init__(self, passes: List[type], post_passes: List[type] = None): self.passes = passes self.post_passes = post_passes or [] - + def check_memory_equivalence(self, ctx: IRContext) -> bool: """ Check that memory passes preserve semantics by comparing execution. - + Returns True if optimized and unoptimized versions are equivalent. """ try: # Deep copy the context for optimization import copy + unoptimized_ctx = copy.deepcopy(ctx) optimized_ctx = copy.deepcopy(ctx) - + # Apply passes to optimized version for fn in optimized_ctx.functions.values(): ac = IRAnalysesCache(fn) for pass_class in self.passes: pass_obj = pass_class(ac, fn) pass_obj.run_pass() - + # Apply post passes for pass_class in self.post_passes: pass_obj = pass_class(ac, fn) pass_obj.run_pass() - + # Use hevm to check equivalence if available try: hevm_check_venom_ctx(unoptimized_ctx, optimized_ctx) @@ -351,7 +387,7 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool: # If hevm fails, we assume the optimization broke semantics hp.note(f"HEVM equivalence check failed: {e}") return False - + except Exception as e: # If optimization fails, skip this test case hp.note(f"Optimization failed: {e}") @@ -360,18 +396,32 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool: # Test with memory-related passes -@pytest.mark.fuzzing -@pytest.mark.parametrize("pass_list", [ - # Test individual memory passes - [__import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass], - [__import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass], - - # Test combinations +@pytest.mark.fuzzing +@pytest.mark.parametrize( + "pass_list", [ - __import__("vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"]).LoadEliminationPass, - __import__("vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"]).DeadStoreEliminationPass, + # Test individual memory passes + [ + __import__( + "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"] + ).LoadEliminationPass + ], + [ + __import__( + "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"] + ).DeadStoreEliminationPass + ], + # Test combinations + [ + __import__( + "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"] + ).LoadEliminationPass, + __import__( + "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"] + ).DeadStoreEliminationPass, + ], ], -]) +) @hp.given(ctx=venom_function_with_memory_ops()) @hp.settings( max_examples=100, @@ -385,21 +435,21 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool: def test_memory_passes_fuzzing(pass_list, ctx): """ Property-based test for memory optimization passes. - + Tests that memory passes preserve semantics by comparing execution between optimized and unoptimized versions. """ hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}") - + # Log the generated IR for debugging - if hasattr(ctx, 'functions') and ctx.functions: + if hasattr(ctx, "functions") and ctx.functions: func = list(ctx.functions.values())[0] hp.note(f"Generated function with {len(func._basic_block_dict)} basic blocks") for bb_name, bb in func._basic_block_dict.items(): hp.note(f"Block {bb_name}: {len(bb.instructions)} instructions") - + checker = MemoryFuzzChecker(pass_list) - + # The property we're testing: optimization should preserve semantics assert checker.check_memory_equivalence(ctx), "Memory optimization broke semantics" @@ -408,8 +458,9 @@ def test_memory_passes_fuzzing(pass_list, ctx): def generate_sample_ir() -> IRContext: """Generate a sample IR for manual inspection.""" import random + random.seed(42) - + # Create a hypothesis example ctx = venom_function_with_memory_ops().example() return ctx @@ -418,17 +469,18 @@ def generate_sample_ir() -> IRContext: if __name__ == "__main__": # Example usage ctx = generate_sample_ir() - + if ctx and ctx.functions: func = list(ctx.functions.values())[0] print(f"Generated function with {len(func._basic_block_dict)} basic blocks:") print(func) - + # Test with a simple pass try: from vyper.venom.passes.load_elimination import LoadEliminationPass + checker = MemoryFuzzChecker([LoadEliminationPass]) result = checker.check_memory_equivalence(ctx) print(f"\nEquivalence check result: {result}") except ImportError: - print("Could not import LoadEliminationPass for testing") \ No newline at end of file + print("Could not import LoadEliminationPass for testing") From 2c5871b26134ffce3d33b590e9083d607a962439 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 15:11:30 +0200 Subject: [PATCH 03/24] remove dead code --- tests/functional/venom/test_memory_fuzzer.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index eda506eb3b..2982b41442 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -13,12 +13,10 @@ import pytest from tests.hevm import hevm_check_venom_ctx -from tests.venom_utils import PrePostChecker from vyper.venom.analysis import IRAnalysesCache from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction -from vyper.venom.passes.base_pass import IRPass # Memory operations that can be fuzzed MEMORY_OPS = ["mload", "mstore", "mcopy"] @@ -40,7 +38,6 @@ MAX_MEMORY_SIZE = 4096 # Limit memory to 4096 bytes MAX_BASIC_BLOCKS = 8 MAX_INSTRUCTIONS_PER_BLOCK = 8 -MAX_VARIABLES = 20 class MemoryFuzzer: @@ -86,13 +83,6 @@ def get_next_bb_label(self) -> IRLabel: self.bb_counter += 1 return IRLabel(f"bb{self.bb_counter}") - def get_random_variable(self, draw) -> IRVariable: - """Get a random available variable or create a new one.""" - if self.available_vars and draw(st.booleans()): - return draw(st.sampled_from(self.available_vars)) - else: - return self.get_next_variable() - def get_memory_address(self, draw) -> IRVariable | IRLiteral: """Get a memory address, biased towards interesting optimizer-relevant locations.""" # 50% chance to use existing variable From fdea0553cea96b1e07867aecd28cf6f26e71a50a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 18:12:09 +0200 Subject: [PATCH 04/24] add some cleanup --- tests/functional/venom/test_memory_fuzzer.py | 354 +++++++++++-------- vyper/venom/context.py | 25 ++ 2 files changed, 239 insertions(+), 140 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 2982b41442..c06a4ad7c1 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -6,17 +6,17 @@ can be plugged with any Venom passes. """ -from typing import List, Optional, Set - import hypothesis as hp import hypothesis.strategies as st import pytest -from tests.hevm import hevm_check_venom_ctx from vyper.venom.analysis import IRAnalysesCache from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction +from vyper.venom.passes.dead_store_elimination import DeadStoreEliminationPass +from vyper.venom.passes.load_elimination import LoadEliminationPass +from vyper.venom.passes.memmerging import MemMergingPass # Memory operations that can be fuzzed MEMORY_OPS = ["mload", "mstore", "mcopy"] @@ -35,9 +35,10 @@ } # Constants for fuzzing -MAX_MEMORY_SIZE = 4096 # Limit memory to 4096 bytes +MAX_MEMORY_SIZE = 4096 # Limit for memory operations MAX_BASIC_BLOCKS = 8 MAX_INSTRUCTIONS_PER_BLOCK = 8 +MAX_LOOP_ITERATIONS = 12 # Maximum iterations before forced loop exit class MemoryFuzzer: @@ -48,7 +49,7 @@ def __init__(self): self.function = None self.variable_counter = 0 self.bb_counter = 0 - self.calldata_offset = 0 + self.calldata_offset = MAX_MEMORY_SIZE self.available_vars = [] # Variables available for use self.allocated_memory_slots = set() # Track memory addresses that have been used @@ -63,7 +64,7 @@ def ensure_all_vars_have_values(self) -> None: """Ensure all available variables have values by using calldataload for unassigned ones.""" # Find all variables that are outputs of instructions assigned_vars = set() - for bb in self.function._basic_block_dict.values(): + for bb in self.function.get_basic_blocks(): for inst in bb.instructions: if inst.output: assigned_vars.add(inst.output) @@ -108,47 +109,6 @@ def get_memory_address(self, draw) -> IRVariable | IRLiteral: return IRLiteral(addr) -@st.composite -def control_flow_graph(draw, max_blocks: int = MAX_BASIC_BLOCKS) -> dict: - """Generate a complex control flow graph structure.""" - num_blocks = draw(st.integers(min_value=2, max_value=max_blocks)) - - # Create adjacency list representation - # Block 0 is always the entry, highest numbered block is always the exit - edges = {} - - for i in range(num_blocks): - edges[i] = [] - - # Ensure connectivity: each block (except exit) has at least one outgoing edge - for i in range(num_blocks - 1): - # Add at least one outgoing edge to ensure no dead blocks - if i == num_blocks - 2: - # Second-to-last block must connect to exit - edges[i].append(num_blocks - 1) - else: - # Can connect to any later block - target = draw(st.integers(min_value=i + 1, max_value=num_blocks - 1)) - edges[i].append(target) - - # Add some additional random edges for complexity - for i in range(num_blocks - 1): - # Chance to add more outgoing edges - if draw(st.booleans()): - # Don't create too many edges - max_additional = min(2, num_blocks - i - 2) - if max_additional > 0: - num_additional = draw(st.integers(min_value=0, max_value=max_additional)) - for _ in range(num_additional): - # Choose a target we're not already connected to - possible_targets = [j for j in range(i + 1, num_blocks) if j not in edges[i]] - if possible_targets: - target = draw(st.sampled_from(possible_targets)) - edges[i].append(target) - - return {"num_blocks": num_blocks, "edges": edges} - - @st.composite def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: """Generate and append a memory instruction to current basic block.""" @@ -159,6 +119,7 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: # %result = mload %addr addr = fuzzer.get_memory_address(draw) result_var = bb.append_instruction("mload", addr) + fuzzer.available_vars.append(result_var) elif op == "mstore": # mstore %value, %addr @@ -191,6 +152,127 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: bb.append_instruction("mcopy", dest, src, IRLiteral(length)) +@st.composite +def control_flow_graph(draw, basic_blocks): + """ + Generate a control flow graph that ensures: + 1. All blocks are reachable from entry + 2. No infinite loops (all loops terminate within 12 iterations) + 3. Proper use of jump and branch instructions + """ + if len(basic_blocks) == 1: + # Single block case - must return + return {basic_blocks[0]: {"type": "return"}} + + cfg = {} + entry_block = basic_blocks[0] + + # Create a spanning tree to ensure all blocks are reachable + remaining_blocks = basic_blocks[1:] + reachable_blocks = [entry_block] + + # Build spanning tree connections + while remaining_blocks: + # Pick a random reachable block to connect from + source = draw(st.sampled_from(reachable_blocks)) + # Pick a random unreachable block to connect to + target = draw(st.sampled_from(remaining_blocks)) + + # Add the target to reachable blocks + reachable_blocks.append(target) + remaining_blocks.remove(target) + + # Decide if this connection should be a jump or branch + if draw(st.booleans()): + # Jump connection + cfg[source] = {"type": "jump", "target": target} + else: + # Branch connection - need two targets + other_target = draw(st.sampled_from(basic_blocks)) + cfg[source] = {"type": "branch", "target1": target, "target2": other_target} + + # Now add additional edges for more complex control flow + num_additional_edges = draw(st.integers(min_value=0, max_value=len(basic_blocks))) + loop_counter_addr = MAX_MEMORY_SIZE # Start of reserved memory for metadata + + for _ in range(num_additional_edges): + source = draw(st.sampled_from(basic_blocks)) + + # Skip if already has terminator + if source in cfg: + continue + + edge_type = draw(st.sampled_from(["jump", "branch"])) + + if edge_type == "jump": + target = draw(st.sampled_from(basic_blocks)) + + # Check if this creates a back edge (potential loop) + is_back_edge = basic_blocks.index(target) <= basic_blocks.index(source) + + if is_back_edge: + # For back edges, use a branch with loop counter instead of unconditional jump + cfg[source] = { + "type": "branch", + "target1": target, + "target2": draw(st.sampled_from(basic_blocks)), + "is_back_edge": True, + "counter_addr": loop_counter_addr, + } + loop_counter_addr += 32 # Next loop uses different memory location + else: + cfg[source] = {"type": "jump", "target": target} + + else: # branch + target1 = draw(st.sampled_from(basic_blocks)) + target2 = draw(st.sampled_from(basic_blocks)) + + # Check if either target creates a back edge + is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(source) + is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(source) + + cfg[source] = { + "type": "branch", + "target1": target1, + "target2": target2, + "is_back_edge": is_back_edge1 or is_back_edge2, + "counter_addr": loop_counter_addr if (is_back_edge1 or is_back_edge2) else None, + } + + if is_back_edge1 or is_back_edge2: + loop_counter_addr += 32 + + # Ensure at least one block can return (avoid infinite execution) + blocks_without_terminators = [bb for bb in basic_blocks if bb not in cfg] + if blocks_without_terminators: + # Make some blocks return + num_returns = max(1, len(blocks_without_terminators) // 3) + return_blocks = draw( + st.lists( + st.sampled_from(blocks_without_terminators), + min_size=num_returns, + max_size=num_returns, + unique=True, + ) + ) + for bb in return_blocks: + cfg[bb] = {"type": "return"} + + # Add random terminators to remaining blocks + remaining = [bb for bb in blocks_without_terminators if bb not in return_blocks] + for bb in remaining: + terminator_type = draw(st.sampled_from(["jump", "branch"])) + if terminator_type == "jump": + target = draw(st.sampled_from(basic_blocks)) + cfg[bb] = {"type": "jump", "target": target} + else: + target1 = draw(st.sampled_from(basic_blocks)) + target2 = draw(st.sampled_from(basic_blocks)) + cfg[bb] = {"type": "branch", "target1": target1, "target2": target2} + + return cfg + + @st.composite def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: """Generate a call to a precompile that produces real output data.""" @@ -228,23 +310,16 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: # Call the precompile gas = bb.append_instruction("gas") # Use all available gas addr = IRLiteral(precompile_addr) - value = IRLiteral(0) - result_var = bb.append_instruction( + bb.append_instruction( "staticcall", gas, addr, input_offset, input_size, output_offset, output_size ) @st.composite -def basic_block_instructions(draw, fuzzer: MemoryFuzzer, is_entry: bool = False) -> None: +def basic_block_instructions(draw, fuzzer: MemoryFuzzer) -> None: """Generate instructions for a basic block.""" - # For entry block, seed memory first - if is_entry: - bb.append_instruction( - "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE) - ) - # Generate main instructions num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK)) @@ -270,13 +345,10 @@ def venom_function_with_memory_ops(draw) -> IRContext: fuzzer.ctx.functions[func_name] = fuzzer.function fuzzer.ctx.entry_function = fuzzer.function - # Generate control flow structure - cfg = draw(control_flow_graph()) - num_blocks = cfg["num_blocks"] - edges = cfg["edges"] - - # Create all basic blocks first + # Generate blocks + num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS)) basic_blocks = [] + for i in range(num_blocks): if i == 0: label = IRLabel("entry") @@ -284,52 +356,80 @@ def venom_function_with_memory_ops(draw) -> IRContext: label = fuzzer.get_next_bb_label() bb = IRBasicBlock(label, fuzzer.function) - fuzzer.function._basic_block_dict[label.value] = bb + fuzzer.function.append_basic_block(bb) basic_blocks.append(bb) # Set entry block fuzzer.function.entry = basic_blocks[0] - # Generate instructions for each block + # Create a control flow graph that ensures reachability and loop termination + cfg = draw(control_flow_graph(basic_blocks)) + + # Initialize memory and loop counters at function entry + entry_block = basic_blocks[0] + entry_block.append_instruction( + "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE) + ) + + # Extract used counter addresses from CFG and initialize them + used_counter_addrs = set() + for terminator_info in cfg.values(): + if terminator_info.get("counter_addr") is not None: + addr = terminator_info["counter_addr"] + assert addr not in used_counter_addrs, f"Duplicate counter address {addr}" + used_counter_addrs.add(addr) + + for addr in used_counter_addrs: + entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr)) + + # Generate content for each block for i, bb in enumerate(basic_blocks): fuzzer.current_bb = bb # Generate block content - is_entry = i == 0 - draw(basic_block_instructions(fuzzer, is_entry=is_entry)) + draw(basic_block_instructions(fuzzer)) - # Add terminator instruction - outgoing_edges = edges[i] - - if i == num_blocks - 1: - # Exit block - return memory contents + # Add terminators based on the control flow graph + terminator_info = cfg[bb] + if terminator_info["type"] == "return": bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0)) - elif len(outgoing_edges) == 1: - # Single outgoing edge - unconditional jump - target_bb = basic_blocks[outgoing_edges[0]] - bb.append_instruction("jmp", target_bb.label) - elif len(outgoing_edges) == 2: - # Two outgoing edges - conditional jump - # Create condition based on memory contents or available variable + elif terminator_info["type"] == "jump": + target = terminator_info["target"] + bb.append_instruction("jmp", target.label) + elif terminator_info["type"] == "branch": + # Use existing variable or create condition if fuzzer.available_vars: cond_var = draw(st.sampled_from(fuzzer.available_vars)) else: - # Load something from memory as condition cond_var = bb.append_instruction("mload", IRLiteral(0)) - target1_bb = basic_blocks[outgoing_edges[0]] - target2_bb = basic_blocks[outgoing_edges[1]] - bb.append_instruction("jnz", target1_bb.label, target2_bb.label, cond_var) - else: - # Multiple edges - use djmp (dynamic jump table) - if fuzzer.available_vars: - selector_var = draw(st.sampled_from(fuzzer.available_vars)) + # Add loop counter check if this is a back edge + if terminator_info.get("is_back_edge", False): + loop_counter_addr = terminator_info["counter_addr"] + + # Load and increment counter + counter = bb.append_instruction("mload", IRLiteral(loop_counter_addr)) + incremented = bb.append_instruction("add", counter, IRLiteral(1)) + bb.append_instruction("mstore", incremented, IRLiteral(loop_counter_addr)) + + # Check if we should continue looping (counter < MAX_LOOP_ITERATIONS) + counter_lt_max = bb.append_instruction( + "lt", incremented, IRLiteral(MAX_LOOP_ITERATIONS) + ) + + # Normalize original condition to 0 or 1 + cond_normalized = bb.append_instruction("and", cond_var, IRLiteral(1)) + + # Continue loop only if: counter < MAX AND original condition is true + combined_cond = bb.append_instruction("and", counter_lt_max, cond_normalized) + cond_var = combined_cond else: - selector_var = bb.append_instruction("mload", IRLiteral(0)) + # Non-loop branches: just normalize condition to 0 or 1 + cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) - # Create jump table - target_labels = [basic_blocks[edge].label for edge in outgoing_edges] - bb.append_instruction("djmp", selector_var, *target_labels) + target1 = terminator_info["target1"] + target2 = terminator_info["target2"] + bb.append_instruction("jnz", target1.label, target2.label, cond_var) # Ensure all variables have values before returning fuzzer.ensure_all_vars_have_values() @@ -340,22 +440,20 @@ def venom_function_with_memory_ops(draw) -> IRContext: class MemoryFuzzChecker: """A pluggable checker for memory passes using fuzzing.""" - def __init__(self, passes: List[type], post_passes: List[type] = None): + def __init__(self, passes: list[type], post_passes: list[type] = None): self.passes = passes self.post_passes = post_passes or [] def check_memory_equivalence(self, ctx: IRContext) -> bool: """ - Check that memory passes preserve semantics by comparing execution. + Check that memory passes preserve semantics. - Returns True if optimized and unoptimized versions are equivalent. + For now, this just verifies that the passes run without errors. + TODO: Implement actual semantic equivalence checking. """ try: - # Deep copy the context for optimization - import copy - - unoptimized_ctx = copy.deepcopy(ctx) - optimized_ctx = copy.deepcopy(ctx) + # Copy the context for optimization + optimized_ctx = ctx.copy() # Apply passes to optimized version for fn in optimized_ctx.functions.values(): @@ -369,19 +467,12 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool: pass_obj = pass_class(ac, fn) pass_obj.run_pass() - # Use hevm to check equivalence if available - try: - hevm_check_venom_ctx(unoptimized_ctx, optimized_ctx) - return True - except Exception as e: - # If hevm fails, we assume the optimization broke semantics - hp.note(f"HEVM equivalence check failed: {e}") - return False + # If we get here, the passes ran successfully + return True except Exception as e: - # If optimization fails, skip this test case + # If optimization fails, the pass has a bug hp.note(f"Optimization failed: {e}") - hp.assume(False) return False @@ -391,25 +482,13 @@ def check_memory_equivalence(self, ctx: IRContext) -> bool: "pass_list", [ # Test individual memory passes - [ - __import__( - "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"] - ).LoadEliminationPass - ], - [ - __import__( - "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"] - ).DeadStoreEliminationPass - ], + [LoadEliminationPass], + [DeadStoreEliminationPass], + [MemMergingPass], # Test combinations - [ - __import__( - "vyper.venom.passes.load_elimination", fromlist=["LoadEliminationPass"] - ).LoadEliminationPass, - __import__( - "vyper.venom.passes.dead_store_elimination", fromlist=["DeadStoreEliminationPass"] - ).DeadStoreEliminationPass, - ], + [LoadEliminationPass, DeadStoreEliminationPass], + [DeadStoreEliminationPass, LoadEliminationPass], + [LoadEliminationPass, MemMergingPass], ], ) @hp.given(ctx=venom_function_with_memory_ops()) @@ -434,14 +513,14 @@ def test_memory_passes_fuzzing(pass_list, ctx): # Log the generated IR for debugging if hasattr(ctx, "functions") and ctx.functions: func = list(ctx.functions.values())[0] - hp.note(f"Generated function with {len(func._basic_block_dict)} basic blocks") - for bb_name, bb in func._basic_block_dict.items(): - hp.note(f"Block {bb_name}: {len(bb.instructions)} instructions") + hp.note(f"Generated function with {func.num_basic_blocks} basic blocks") + for bb in func.get_basic_blocks(): + hp.note(f"Block {bb.label.value}: {len(bb.instructions)} instructions") checker = MemoryFuzzChecker(pass_list) - # The property we're testing: optimization should preserve semantics - assert checker.check_memory_equivalence(ctx), "Memory optimization broke semantics" + # The property we're testing: optimization passes should not crash + assert checker.check_memory_equivalence(ctx), "Memory optimization pass crashed" # Utility function for manual testing @@ -462,15 +541,10 @@ def generate_sample_ir() -> IRContext: if ctx and ctx.functions: func = list(ctx.functions.values())[0] - print(f"Generated function with {len(func._basic_block_dict)} basic blocks:") + print(f"Generated function with {func.num_basic_blocks} basic blocks:") print(func) # Test with a simple pass - try: - from vyper.venom.passes.load_elimination import LoadEliminationPass - - checker = MemoryFuzzChecker([LoadEliminationPass]) - result = checker.check_memory_equivalence(ctx) - print(f"\nEquivalence check result: {result}") - except ImportError: - print("Could not import LoadEliminationPass for testing") + checker = MemoryFuzzChecker([LoadEliminationPass]) + result = checker.check_memory_equivalence(ctx) + print(f"\nEquivalence check result: {result}") diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 30fac4875d..51c3420852 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -23,6 +23,12 @@ class DataSection: label: IRLabel data_items: list[DataItem] = field(default_factory=list) + def copy(self) -> "DataSection": + new_section = DataSection(self.label) + for item in self.data_items: + new_section.data_items.append(DataItem(item.data)) + return new_section + def __str__(self): ret = [f"dbsection {self.label.value}:"] for item in self.data_items: @@ -99,6 +105,25 @@ def append_data_item(self, data: IRLabel | bytes) -> None: data_section = self.data_segment[-1] data_section.data_items.append(DataItem(data)) + def copy(self) -> "IRContext": + new_ctx = IRContext() + new_ctx.ctor_mem_size = self.ctor_mem_size + new_ctx.immutables_len = self.immutables_len + new_ctx.last_label = self.last_label + new_ctx.last_variable = self.last_variable + + for label, fn in self.functions.items(): + new_fn = fn.copy() + new_ctx.add_function(new_fn) + + if self.entry_function is not None: + new_ctx.entry_function = new_ctx.functions[self.entry_function.name] + + for section in self.data_segment: + new_ctx.data_segment.append(section.copy()) + + return new_ctx + def as_graph(self) -> str: s = ["digraph G {"] for fn in self.functions.values(): From 49ecb06806e245d856551d6768e636b03f86e3d3 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 18:15:28 +0200 Subject: [PATCH 05/24] lint --- tests/functional/venom/test_memory_fuzzer.py | 2 +- vyper/venom/context.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index c06a4ad7c1..b32bb9ed94 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -383,7 +383,7 @@ def venom_function_with_memory_ops(draw) -> IRContext: entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr)) # Generate content for each block - for i, bb in enumerate(basic_blocks): + for bb in basic_blocks: fuzzer.current_bb = bb # Generate block content diff --git a/vyper/venom/context.py b/vyper/venom/context.py index 51c3420852..e9be707fa7 100644 --- a/vyper/venom/context.py +++ b/vyper/venom/context.py @@ -112,7 +112,7 @@ def copy(self) -> "IRContext": new_ctx.last_label = self.last_label new_ctx.last_variable = self.last_variable - for label, fn in self.functions.items(): + for fn in self.functions.values(): new_fn = fn.copy() new_ctx.add_function(new_fn) From 2791a82d2abda55dc1f351aad28c697b15fc0511 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 19:27:36 +0200 Subject: [PATCH 06/24] polishing, refactoring --- tests/functional/venom/test_memory_fuzzer.py | 380 +++++++++---------- 1 file changed, 179 insertions(+), 201 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index b32bb9ed94..1413b1b8d3 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -6,6 +6,9 @@ can be plugged with any Venom passes. """ +from dataclasses import dataclass +from typing import Optional + import hypothesis as hp import hypothesis.strategies as st import pytest @@ -18,27 +21,56 @@ from vyper.venom.passes.load_elimination import LoadEliminationPass from vyper.venom.passes.memmerging import MemMergingPass -# Memory operations that can be fuzzed MEMORY_OPS = ["mload", "mstore", "mcopy"] -# Precompile addresses for fence operations that generate real data +# precompiles act as fence operations that generate real output data, +# preventing optimizers from eliminating memory operations PRECOMPILES = { - 0x1: "ecrecover", # Returns 32 bytes - 0x2: "sha256", # Returns 32 bytes - 0x3: "ripemd160", # Returns 32 bytes - 0x4: "identity", # Returns input data - 0x5: "modexp", # Returns variable length - 0x6: "ecadd", # Returns 64 bytes - 0x7: "ecmul", # Returns 64 bytes - 0x8: "ecpairing", # Returns 32 bytes - 0x9: "blake2f", # Returns 64 bytes + 0x1: "ecrecover", + 0x2: "sha256", + 0x3: "ripemd160", + 0x4: "identity", + 0x5: "modexp", + 0x6: "ecadd", + 0x7: "ecmul", + 0x8: "ecpairing", + 0x9: "blake2f", } -# Constants for fuzzing -MAX_MEMORY_SIZE = 4096 # Limit for memory operations +MAX_MEMORY_SIZE = 4096 MAX_BASIC_BLOCKS = 8 MAX_INSTRUCTIONS_PER_BLOCK = 8 -MAX_LOOP_ITERATIONS = 12 # Maximum iterations before forced loop exit +MAX_LOOP_ITERATIONS = 12 + + +@dataclass +class _BBType: + """Base class for basic block types in the CFG.""" + pass + + +@dataclass +class _ReturnBB(_BBType): + """Basic block that returns.""" + pass + + +@dataclass +class _JumpBB(_BBType): + """Basic block with unconditional jump.""" + target: IRBasicBlock + + +@dataclass +class _BranchBB(_BBType): + """Basic block with conditional branch.""" + target1: IRBasicBlock + target2: IRBasicBlock + counter_addr: Optional[int] = None + + @property + def is_back_edge(self) -> bool: + return self.counter_addr is not None class MemoryFuzzer: @@ -50,8 +82,8 @@ def __init__(self): self.variable_counter = 0 self.bb_counter = 0 self.calldata_offset = MAX_MEMORY_SIZE - self.available_vars = [] # Variables available for use - self.allocated_memory_slots = set() # Track memory addresses that have been used + self.available_vars = [] + self.allocated_memory_slots = set() def get_next_variable(self) -> IRVariable: """Generate a new unique variable.""" @@ -62,19 +94,16 @@ def get_next_variable(self) -> IRVariable: def ensure_all_vars_have_values(self) -> None: """Ensure all available variables have values by using calldataload for unassigned ones.""" - # Find all variables that are outputs of instructions assigned_vars = set() for bb in self.function.get_basic_blocks(): for inst in bb.instructions: if inst.output: assigned_vars.add(inst.output) - # For variables that don't have values, add calldataload at the beginning entry_bb = self.function.entry unassigned_vars = [var for var in self.available_vars if var not in assigned_vars] for i, var in enumerate(unassigned_vars): - # Insert calldataload at the beginning of the entry block inst = IRInstruction("calldataload", [IRLiteral(self.calldata_offset)], var) entry_bb.insert_instruction(inst, index=i) self.calldata_offset += 32 @@ -86,29 +115,41 @@ def get_next_bb_label(self) -> IRLabel: def get_memory_address(self, draw) -> IRVariable | IRLiteral: """Get a memory address, biased towards interesting optimizer-relevant locations.""" - # 50% chance to use existing variable if self.available_vars and draw(st.booleans()): return draw(st.sampled_from(self.available_vars)) - # Generate literal address if self.allocated_memory_slots and draw(st.booleans()): - # Bias towards addresses near existing allocations + # bias towards addresses near existing allocations to create aliasing opportunities base_addr = draw(st.sampled_from(list(self.allocated_memory_slots))) - # Random offset biased towards edges (0 and 32 are most common) offset = draw(st.integers(min_value=-32, max_value=32)) - if draw(st.booleans()): # 50% chance to snap to edge + if draw(st.booleans()): + # snap to word boundaries for more interesting aliasing patterns offset = 0 if abs(offset) < 16 else (32 if offset > 0 else -32) addr = max(0, min(MAX_MEMORY_SIZE - 32, base_addr + offset)) else: - # Random address anywhere in memory addr = draw(st.integers(min_value=0, max_value=MAX_MEMORY_SIZE - 32)) self.allocated_memory_slots.add(addr) return IRLiteral(addr) +@st.composite +def copy_length(draw) -> int: + """Generate a length suitable for a copy operation.""" + if draw(st.booleans()): + # small lengths are more interesting for optimizer edge cases + if draw(st.booleans()): + return draw( + st.sampled_from([1, 2, 4, 8, 16, 20, 24, 28, 31, 32, 33, 36, 40, 48, 64, 96]) + ) + else: + return draw(st.integers(min_value=1, max_value=96)) + else: + return draw(st.integers(min_value=97, max_value=1024)) + + @st.composite def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: """Generate and append a memory instruction to current basic block.""" @@ -116,14 +157,11 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: bb = fuzzer.current_bb if op == "mload": - # %result = mload %addr addr = fuzzer.get_memory_address(draw) result_var = bb.append_instruction("mload", addr) fuzzer.available_vars.append(result_var) elif op == "mstore": - # mstore %value, %addr - # Random choice between variable and literal for value if fuzzer.available_vars and draw(st.booleans()): value = draw(st.sampled_from(fuzzer.available_vars)) else: @@ -132,25 +170,14 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: bb.append_instruction("mstore", value, addr) elif op == "mcopy": - # mcopy %dest, %src, %length dest = fuzzer.get_memory_address(draw) src = fuzzer.get_memory_address(draw) - - # Bias towards small lengths (more interesting for optimizers) - if draw(st.booleans()): - # Small lengths (1-96 bytes, biased towards 32-byte multiples) - if draw(st.booleans()): - length = draw( - st.sampled_from([1, 2, 4, 8, 16, 20, 24, 28, 31, 32, 33, 36, 40, 48, 64, 96]) - ) - else: - length = draw(st.integers(min_value=1, max_value=96)) - else: - # Larger lengths (up to 1KB) - length = draw(st.integers(min_value=97, max_value=1024)) - + length = draw(copy_length()) bb.append_instruction("mcopy", dest, src, IRLiteral(length)) + else: + raise ValueError("unreachable") + @st.composite def control_flow_graph(draw, basic_blocks): @@ -161,44 +188,35 @@ def control_flow_graph(draw, basic_blocks): 3. Proper use of jump and branch instructions """ if len(basic_blocks) == 1: - # Single block case - must return - return {basic_blocks[0]: {"type": "return"}} + return {basic_blocks[0]: _ReturnBB()} - cfg = {} + cfg: dict[IRBasicBlock, _BBType] = {} entry_block = basic_blocks[0] - # Create a spanning tree to ensure all blocks are reachable + # create a spanning tree to ensure all blocks are reachable remaining_blocks = basic_blocks[1:] reachable_blocks = [entry_block] - # Build spanning tree connections while remaining_blocks: - # Pick a random reachable block to connect from source = draw(st.sampled_from(reachable_blocks)) - # Pick a random unreachable block to connect to target = draw(st.sampled_from(remaining_blocks)) - # Add the target to reachable blocks reachable_blocks.append(target) remaining_blocks.remove(target) - # Decide if this connection should be a jump or branch if draw(st.booleans()): - # Jump connection - cfg[source] = {"type": "jump", "target": target} + cfg[source] = _JumpBB(target=target) else: - # Branch connection - need two targets other_target = draw(st.sampled_from(basic_blocks)) - cfg[source] = {"type": "branch", "target1": target, "target2": other_target} + cfg[source] = _BranchBB(target1=target, target2=other_target) - # Now add additional edges for more complex control flow + # add additional edges for more complex control flow num_additional_edges = draw(st.integers(min_value=0, max_value=len(basic_blocks))) - loop_counter_addr = MAX_MEMORY_SIZE # Start of reserved memory for metadata + loop_counter_addr = MAX_MEMORY_SIZE for _ in range(num_additional_edges): source = draw(st.sampled_from(basic_blocks)) - # Skip if already has terminator if source in cfg: continue @@ -207,68 +225,59 @@ def control_flow_graph(draw, basic_blocks): if edge_type == "jump": target = draw(st.sampled_from(basic_blocks)) - # Check if this creates a back edge (potential loop) is_back_edge = basic_blocks.index(target) <= basic_blocks.index(source) if is_back_edge: - # For back edges, use a branch with loop counter instead of unconditional jump - cfg[source] = { - "type": "branch", - "target1": target, - "target2": draw(st.sampled_from(basic_blocks)), - "is_back_edge": True, - "counter_addr": loop_counter_addr, - } - loop_counter_addr += 32 # Next loop uses different memory location + # back edges need loop counters to prevent infinite loops + cfg[source] = _BranchBB( + target1=target, + target2=draw(st.sampled_from(basic_blocks)), + counter_addr=loop_counter_addr, + ) + loop_counter_addr += 32 else: - cfg[source] = {"type": "jump", "target": target} + cfg[source] = _JumpBB(target=target) else: # branch target1 = draw(st.sampled_from(basic_blocks)) target2 = draw(st.sampled_from(basic_blocks)) - # Check if either target creates a back edge is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(source) is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(source) - cfg[source] = { - "type": "branch", - "target1": target1, - "target2": target2, - "is_back_edge": is_back_edge1 or is_back_edge2, - "counter_addr": loop_counter_addr if (is_back_edge1 or is_back_edge2) else None, - } + cfg[source] = _BranchBB( + target1=target1, + target2=target2, + counter_addr=loop_counter_addr if (is_back_edge1 or is_back_edge2) else None, + ) if is_back_edge1 or is_back_edge2: loop_counter_addr += 32 - # Ensure at least one block can return (avoid infinite execution) - blocks_without_terminators = [bb for bb in basic_blocks if bb not in cfg] - if blocks_without_terminators: - # Make some blocks return - num_returns = max(1, len(blocks_without_terminators) // 3) + # ensure at least one block can return + remaining_blocks = [bb for bb in basic_blocks if bb not in cfg] + if remaining_blocks: return_blocks = draw( st.lists( - st.sampled_from(blocks_without_terminators), - min_size=num_returns, - max_size=num_returns, + st.sampled_from(remaining_blocks), + min_size=1, + max_size=len(remaining_blocks), unique=True, ) ) for bb in return_blocks: - cfg[bb] = {"type": "return"} - - # Add random terminators to remaining blocks - remaining = [bb for bb in blocks_without_terminators if bb not in return_blocks] - for bb in remaining: - terminator_type = draw(st.sampled_from(["jump", "branch"])) - if terminator_type == "jump": - target = draw(st.sampled_from(basic_blocks)) - cfg[bb] = {"type": "jump", "target": target} - else: - target1 = draw(st.sampled_from(basic_blocks)) - target2 = draw(st.sampled_from(basic_blocks)) - cfg[bb] = {"type": "branch", "target1": target1, "target2": target2} + cfg[bb] = _ReturnBB() + + remaining = [bb for bb in basic_blocks if bb not in cfg] + for bb in remaining: + terminator_type = draw(st.sampled_from(["jump", "branch"])) + if terminator_type == "jump": + target = draw(st.sampled_from(basic_blocks)) + cfg[bb] = _JumpBB(target=target) + else: + target1 = draw(st.sampled_from(basic_blocks)) + target2 = draw(st.sampled_from(basic_blocks)) + cfg[bb] = _BranchBB(target1=target1, target2=target2) return cfg @@ -278,74 +287,78 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: """Generate a call to a precompile that produces real output data.""" bb = fuzzer.current_bb - # Choose a precompile precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys()))) precompile_name = PRECOMPILES[precompile_addr] - # Set up input data in memory - input_offset = fuzzer.get_memory_address(draw) - output_offset = fuzzer.get_memory_address(draw) + input_ofst = fuzzer.get_memory_address(draw) + output_ofst = fuzzer.get_memory_address(draw) - if precompile_name == "identity": - # Identity precompile - copies input to output - input_size = IRLiteral(32) + if precompile_name == "ecrecover": + input_size = IRLiteral(128) # v, r, s, hash output_size = IRLiteral(32) elif precompile_name == "sha256": - # SHA256 - takes any input, outputs 32 bytes - input_size = IRLiteral(64) # Use 64 bytes input + input_size = IRLiteral(64) output_size = IRLiteral(32) - elif precompile_name == "blake2f": - # Blake2f - outputs 64 bytes - input_size = IRLiteral(213) # Blake2f requires 213 bytes input + elif precompile_name == "ripemd160": + input_size = IRLiteral(64) + output_size = IRLiteral(32) + elif precompile_name == "identity": + # identity copies min(input_size, output_size) bytes + input_size = IRLiteral(draw(copy_length())) + output_size = IRLiteral(draw(copy_length())) + elif precompile_name == "modexp": + input_size = IRLiteral(96) # minimal: base_len, exp_len, mod_len + output_size = IRLiteral(32) + elif precompile_name == "ecadd": + input_size = IRLiteral(128) # two EC points (x1, y1, x2, y2) output_size = IRLiteral(64) - elif precompile_name in ["ecadd", "ecmul"]: - # EC operations - specific input/output sizes - input_size = IRLiteral(96) # EC point operations + elif precompile_name == "ecmul": + input_size = IRLiteral(96) # EC point (x, y) and scalar output_size = IRLiteral(64) - else: - # Default case - input_size = IRLiteral(32) + elif precompile_name == "ecpairing": + input_size = IRLiteral(192) # minimal: one pair of G1 and G2 points output_size = IRLiteral(32) + elif precompile_name == "blake2f": + input_size = IRLiteral(213) # blake2f requires specific input size + output_size = IRLiteral(64) + else: + # unreachable + raise Exception(f"Unknown precompile: {precompile_name}") - # Call the precompile - gas = bb.append_instruction("gas") # Use all available gas + gas = bb.append_instruction("gas") addr = IRLiteral(precompile_addr) bb.append_instruction( - "staticcall", gas, addr, input_offset, input_size, output_offset, output_size + "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size ) @st.composite def basic_block_instructions(draw, fuzzer: MemoryFuzzer) -> None: """Generate instructions for a basic block.""" - - # Generate main instructions num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK)) for _ in range(num_instructions): - # Choose instruction type inst_type = draw(st.sampled_from(["memory", "precompile"])) if inst_type == "memory": draw(memory_instruction(fuzzer)) elif inst_type == "precompile": draw(precompile_call(fuzzer)) + else: + raise Exception("unreachable") @st.composite def venom_function_with_memory_ops(draw) -> IRContext: """Generate a complete Venom IR function using IRBasicBlock API.""" - fuzzer = MemoryFuzzer() - # Create function func_name = IRLabel("_fuzz_function", is_symbol=True) fuzzer.function = IRFunction(func_name, fuzzer.ctx) fuzzer.ctx.functions[func_name] = fuzzer.function fuzzer.ctx.entry_function = fuzzer.function - # Generate blocks num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS)) basic_blocks = [] @@ -359,79 +372,66 @@ def venom_function_with_memory_ops(draw) -> IRContext: fuzzer.function.append_basic_block(bb) basic_blocks.append(bb) - # Set entry block fuzzer.function.entry = basic_blocks[0] - # Create a control flow graph that ensures reachability and loop termination cfg = draw(control_flow_graph(basic_blocks)) - # Initialize memory and loop counters at function entry entry_block = basic_blocks[0] entry_block.append_instruction( "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE) ) - # Extract used counter addresses from CFG and initialize them + # extract loop counter addresses and initialize them used_counter_addrs = set() - for terminator_info in cfg.values(): - if terminator_info.get("counter_addr") is not None: - addr = terminator_info["counter_addr"] + for bb_type in cfg.values(): + if isinstance(bb_type, _BranchBB) and bb_type.counter_addr is not None: + addr = bb_type.counter_addr assert addr not in used_counter_addrs, f"Duplicate counter address {addr}" used_counter_addrs.add(addr) for addr in used_counter_addrs: entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr)) - # Generate content for each block for bb in basic_blocks: fuzzer.current_bb = bb - # Generate block content draw(basic_block_instructions(fuzzer)) - # Add terminators based on the control flow graph - terminator_info = cfg[bb] - if terminator_info["type"] == "return": + bb_type = cfg[bb] + + if isinstance(bb_type, _ReturnBB): bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0)) - elif terminator_info["type"] == "jump": - target = terminator_info["target"] - bb.append_instruction("jmp", target.label) - elif terminator_info["type"] == "branch": - # Use existing variable or create condition + + elif isinstance(bb_type, _JumpBB): + bb.append_instruction("jmp", bb_type.target.label) + + elif isinstance(bb_type, _BranchBB): if fuzzer.available_vars: cond_var = draw(st.sampled_from(fuzzer.available_vars)) else: cond_var = bb.append_instruction("mload", IRLiteral(0)) - # Add loop counter check if this is a back edge - if terminator_info.get("is_back_edge", False): - loop_counter_addr = terminator_info["counter_addr"] + cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) + + if bb_type.is_back_edge: + loop_counter_addr = bb_type.counter_addr - # Load and increment counter counter = bb.append_instruction("mload", IRLiteral(loop_counter_addr)) - incremented = bb.append_instruction("add", counter, IRLiteral(1)) - bb.append_instruction("mstore", incremented, IRLiteral(loop_counter_addr)) + counter = bb.append_instruction("add", counter, IRLiteral(1)) + bb.append_instruction("mstore", counter, IRLiteral(loop_counter_addr)) - # Check if we should continue looping (counter < MAX_LOOP_ITERATIONS) + # continue loop only if: counter < MAX_LOOP_ITERATIONS AND original condition counter_lt_max = bb.append_instruction( "lt", incremented, IRLiteral(MAX_LOOP_ITERATIONS) ) - # Normalize original condition to 0 or 1 - cond_normalized = bb.append_instruction("and", cond_var, IRLiteral(1)) + cond_var = bb.append_instruction("and", counter_lt_max, cond_var) - # Continue loop only if: counter < MAX AND original condition is true - combined_cond = bb.append_instruction("and", counter_lt_max, cond_normalized) - cond_var = combined_cond - else: - # Non-loop branches: just normalize condition to 0 or 1 - cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) - - target1 = terminator_info["target1"] - target2 = terminator_info["target2"] - bb.append_instruction("jnz", target1.label, target2.label, cond_var) + bb.append_instruction("jnz", bb_type.target1.label, bb_type.target2.label, cond_var) + + else: + raise Exception() # unreachable - # Ensure all variables have values before returning fuzzer.ensure_all_vars_have_values() return fuzzer.ctx @@ -444,36 +444,23 @@ def __init__(self, passes: list[type], post_passes: list[type] = None): self.passes = passes self.post_passes = post_passes or [] - def check_memory_equivalence(self, ctx: IRContext) -> bool: + def run_passes(self, ctx: IRContext) -> None: """ - Check that memory passes preserve semantics. + Run optimization passes on the IR context. - For now, this just verifies that the passes run without errors. - TODO: Implement actual semantic equivalence checking. + This method lets exceptions bubble up so Hypothesis can handle them properly. """ - try: - # Copy the context for optimization - optimized_ctx = ctx.copy() - - # Apply passes to optimized version - for fn in optimized_ctx.functions.values(): - ac = IRAnalysesCache(fn) - for pass_class in self.passes: - pass_obj = pass_class(ac, fn) - pass_obj.run_pass() + optimized_ctx = ctx.copy() - # Apply post passes - for pass_class in self.post_passes: - pass_obj = pass_class(ac, fn) - pass_obj.run_pass() + for fn in optimized_ctx.functions.values(): + ac = IRAnalysesCache(fn) + for pass_class in self.passes: + pass_obj = pass_class(ac, fn) + pass_obj.run_pass() - # If we get here, the passes ran successfully - return True - - except Exception as e: - # If optimization fails, the pass has a bug - hp.note(f"Optimization failed: {e}") - return False + for pass_class in self.post_passes: + pass_obj = pass_class(ac, fn) + pass_obj.run_pass() # Test with memory-related passes @@ -505,12 +492,10 @@ def test_memory_passes_fuzzing(pass_list, ctx): """ Property-based test for memory optimization passes. - Tests that memory passes preserve semantics by comparing execution - between optimized and unoptimized versions. + Tests that memory passes do not crash on complex IR. """ hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}") - # Log the generated IR for debugging if hasattr(ctx, "functions") and ctx.functions: func = list(ctx.functions.values())[0] hp.note(f"Generated function with {func.num_basic_blocks} basic blocks") @@ -518,25 +503,19 @@ def test_memory_passes_fuzzing(pass_list, ctx): hp.note(f"Block {bb.label.value}: {len(bb.instructions)} instructions") checker = MemoryFuzzChecker(pass_list) - - # The property we're testing: optimization passes should not crash - assert checker.check_memory_equivalence(ctx), "Memory optimization pass crashed" + checker.run_passes(ctx) -# Utility function for manual testing def generate_sample_ir() -> IRContext: """Generate a sample IR for manual inspection.""" import random random.seed(42) - - # Create a hypothesis example ctx = venom_function_with_memory_ops().example() return ctx if __name__ == "__main__": - # Example usage ctx = generate_sample_ir() if ctx and ctx.functions: @@ -544,7 +523,6 @@ def generate_sample_ir() -> IRContext: print(f"Generated function with {func.num_basic_blocks} basic blocks:") print(func) - # Test with a simple pass checker = MemoryFuzzChecker([LoadEliminationPass]) - result = checker.check_memory_equivalence(ctx) - print(f"\nEquivalence check result: {result}") + checker.run_passes(ctx) + print("\nPasses completed successfully") From 193c5d49e587e108d6f8b527451ae08f9c9c47ae Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 20:22:38 +0200 Subject: [PATCH 07/24] make things compile --- tests/functional/venom/test_memory_fuzzer.py | 210 ++++++++----------- 1 file changed, 90 insertions(+), 120 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 1413b1b8d3..60c6c03c31 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -17,9 +17,7 @@ from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction -from vyper.venom.passes.dead_store_elimination import DeadStoreEliminationPass -from vyper.venom.passes.load_elimination import LoadEliminationPass -from vyper.venom.passes.memmerging import MemMergingPass +from vyper.venom.passes import DeadStoreElimination,LoadElimination, MemMergePass MEMORY_OPS = ["mload", "mstore", "mcopy"] @@ -46,30 +44,34 @@ @dataclass class _BBType: """Base class for basic block types in the CFG.""" + pass @dataclass class _ReturnBB(_BBType): """Basic block that returns.""" + pass @dataclass class _JumpBB(_BBType): """Basic block with unconditional jump.""" + target: IRBasicBlock @dataclass class _BranchBB(_BBType): """Basic block with conditional branch.""" + target1: IRBasicBlock target2: IRBasicBlock counter_addr: Optional[int] = None - + @property - def is_back_edge(self) -> bool: + def has_back_edge(self) -> bool: return self.counter_addr is not None @@ -113,6 +115,13 @@ def get_next_bb_label(self) -> IRLabel: self.bb_counter += 1 return IRLabel(f"bb{self.bb_counter}") + def get_random_variable(self, draw) -> IRVariable: + """Get a random available variable or create a new one.""" + if self.available_vars and draw(st.booleans()): + return draw(st.sampled_from(self.available_vars)) + else: + return self.get_next_variable() + def get_memory_address(self, draw) -> IRVariable | IRLiteral: """Get a memory address, biased towards interesting optimizer-relevant locations.""" if self.available_vars and draw(st.booleans()): @@ -151,10 +160,9 @@ def copy_length(draw) -> int: @st.composite -def memory_instruction(draw, fuzzer: MemoryFuzzer) -> None: +def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: """Generate and append a memory instruction to current basic block.""" op = draw(st.sampled_from(MEMORY_OPS)) - bb = fuzzer.current_bb if op == "mload": addr = fuzzer.get_memory_address(draw) @@ -187,20 +195,25 @@ def control_flow_graph(draw, basic_blocks): 2. No infinite loops (all loops terminate within 12 iterations) 3. Proper use of jump and branch instructions """ - if len(basic_blocks) == 1: - return {basic_blocks[0]: _ReturnBB()} - cfg: dict[IRBasicBlock, _BBType] = {} - entry_block = basic_blocks[0] + + # last block is always a return block - guarantees all other blocks have forward targets + cfg[basic_blocks[-1]] = _ReturnBB() + + # cache forward targets for each block for performance + forward_targets = {} + for i, bb in enumerate(basic_blocks): + forward_targets[bb] = basic_blocks[i + 1 :] # create a spanning tree to ensure all blocks are reachable - remaining_blocks = basic_blocks[1:] - reachable_blocks = [entry_block] + remaining_blocks = basic_blocks[1:] # exclude entry block + reachable_blocks = [basic_blocks[0]] while remaining_blocks: source = draw(st.sampled_from(reachable_blocks)) target = draw(st.sampled_from(remaining_blocks)) + # target is now reachable, but it may not be in cfg yet reachable_blocks.append(target) remaining_blocks.remove(target) @@ -210,82 +223,45 @@ def control_flow_graph(draw, basic_blocks): other_target = draw(st.sampled_from(basic_blocks)) cfg[source] = _BranchBB(target1=target, target2=other_target) - # add additional edges for more complex control flow - num_additional_edges = draw(st.integers(min_value=0, max_value=len(basic_blocks))) + # classify remaining blocks that were not handled during spanning + # tree construction. loop_counter_addr = MAX_MEMORY_SIZE - for _ in range(num_additional_edges): - source = draw(st.sampled_from(basic_blocks)) - - if source in cfg: + for bb in basic_blocks: + if bb in cfg: continue edge_type = draw(st.sampled_from(["jump", "branch"])) if edge_type == "jump": - target = draw(st.sampled_from(basic_blocks)) - - is_back_edge = basic_blocks.index(target) <= basic_blocks.index(source) - - if is_back_edge: - # back edges need loop counters to prevent infinite loops - cfg[source] = _BranchBB( - target1=target, - target2=draw(st.sampled_from(basic_blocks)), - counter_addr=loop_counter_addr, - ) - loop_counter_addr += 32 - else: - cfg[source] = _JumpBB(target=target) - + target = draw(st.sampled_from(forward_targets[bb])) + cfg[bb] = _JumpBB(target=target) else: # branch target1 = draw(st.sampled_from(basic_blocks)) target2 = draw(st.sampled_from(basic_blocks)) - is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(source) - is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(source) + is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(bb) + is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(bb) - cfg[source] = _BranchBB( - target1=target1, - target2=target2, - counter_addr=loop_counter_addr if (is_back_edge1 or is_back_edge2) else None, - ) + if is_back_edge1 and is_back_edge2: + # ensure at least one target provides forward progress + target2 = draw(st.sampled_from(forward_targets[bb])) + is_back_edge2 = False - if is_back_edge1 or is_back_edge2: - loop_counter_addr += 32 + contains_back_edge = is_back_edge1 or is_back_edge2 + counter_addr = loop_counter_addr if contains_back_edge else None - # ensure at least one block can return - remaining_blocks = [bb for bb in basic_blocks if bb not in cfg] - if remaining_blocks: - return_blocks = draw( - st.lists( - st.sampled_from(remaining_blocks), - min_size=1, - max_size=len(remaining_blocks), - unique=True, - ) - ) - for bb in return_blocks: - cfg[bb] = _ReturnBB() - - remaining = [bb for bb in basic_blocks if bb not in cfg] - for bb in remaining: - terminator_type = draw(st.sampled_from(["jump", "branch"])) - if terminator_type == "jump": - target = draw(st.sampled_from(basic_blocks)) - cfg[bb] = _JumpBB(target=target) - else: - target1 = draw(st.sampled_from(basic_blocks)) - target2 = draw(st.sampled_from(basic_blocks)) - cfg[bb] = _BranchBB(target1=target1, target2=target2) + cfg[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr) + + if contains_back_edge: + loop_counter_addr += 32 return cfg @st.composite -def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: +def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: """Generate a call to a precompile that produces real output data.""" - bb = fuzzer.current_bb precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys()))) precompile_name = PRECOMPILES[precompile_addr] @@ -328,23 +304,21 @@ def precompile_call(draw, fuzzer: MemoryFuzzer) -> None: gas = bb.append_instruction("gas") addr = IRLiteral(precompile_addr) - bb.append_instruction( - "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size - ) + bb.append_instruction("staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size) @st.composite -def basic_block_instructions(draw, fuzzer: MemoryFuzzer) -> None: +def basic_block_instructions(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: """Generate instructions for a basic block.""" num_instructions = draw(st.integers(min_value=1, max_value=MAX_INSTRUCTIONS_PER_BLOCK)) for _ in range(num_instructions): - inst_type = draw(st.sampled_from(["memory", "precompile"])) + inst_type = draw(st.sampled_from(["memory"] * 9 + ["precompile"])) if inst_type == "memory": - draw(memory_instruction(fuzzer)) + draw(memory_instruction(fuzzer, bb)) elif inst_type == "precompile": - draw(precompile_call(fuzzer)) + draw(precompile_call(fuzzer, bb)) else: raise Exception("unreachable") @@ -362,6 +336,9 @@ def venom_function_with_memory_ops(draw) -> IRContext: num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS)) basic_blocks = [] + # clear default entry block + fuzzer.function.clear_basic_blocks() + for i in range(num_blocks): if i == 0: label = IRLabel("entry") @@ -372,7 +349,7 @@ def venom_function_with_memory_ops(draw) -> IRContext: fuzzer.function.append_basic_block(bb) basic_blocks.append(bb) - fuzzer.function.entry = basic_blocks[0] + assert fuzzer.function.entry is basic_blocks[0] cfg = draw(control_flow_graph(basic_blocks)) @@ -382,55 +359,52 @@ def venom_function_with_memory_ops(draw) -> IRContext: ) # extract loop counter addresses and initialize them - used_counter_addrs = set() + counter_addrs = set() for bb_type in cfg.values(): if isinstance(bb_type, _BranchBB) and bb_type.counter_addr is not None: addr = bb_type.counter_addr - assert addr not in used_counter_addrs, f"Duplicate counter address {addr}" - used_counter_addrs.add(addr) + assert addr not in counter_addrs, f"Duplicate counter address {addr}" + counter_addrs.add(addr) - for addr in used_counter_addrs: + for addr in counter_addrs: entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr)) for bb in basic_blocks: - fuzzer.current_bb = bb - - draw(basic_block_instructions(fuzzer)) + draw(basic_block_instructions(fuzzer, bb)) bb_type = cfg[bb] - + if isinstance(bb_type, _ReturnBB): bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0)) - + elif isinstance(bb_type, _JumpBB): bb.append_instruction("jmp", bb_type.target.label) - - elif isinstance(bb_type, _BranchBB): - if fuzzer.available_vars: - cond_var = draw(st.sampled_from(fuzzer.available_vars)) - else: - cond_var = bb.append_instruction("mload", IRLiteral(0)) + elif isinstance(bb_type, _BranchBB): + cond_var = fuzzer.get_random_variable(draw) + # get bottom bit, for bias reasons cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) - if bb_type.is_back_edge: - loop_counter_addr = bb_type.counter_addr + if bb_type.has_back_edge: + loop_counter_addr = IRLiteral(bb_type.counter_addr) - counter = bb.append_instruction("mload", IRLiteral(loop_counter_addr)) - counter = bb.append_instruction("add", counter, IRLiteral(1)) - bb.append_instruction("mstore", counter, IRLiteral(loop_counter_addr)) + counter = bb.append_instruction("mload", loop_counter_addr) + incr_counter = bb.append_instruction("add", counter, IRLiteral(1)) + bb.append_instruction("mstore", incr_counter, loop_counter_addr) - # continue loop only if: counter < MAX_LOOP_ITERATIONS AND original condition - counter_lt_max = bb.append_instruction( - "lt", incremented, IRLiteral(MAX_LOOP_ITERATIONS) - ) + # exit loop when counter >= MAX_LOOP_ITERATIONS + # (note we are guaranteed that second target provides forward + # progress) + max_iterations = IRLiteral(MAX_LOOP_ITERATIONS) + # counter < iterbound + counter_ok = bb.append_instruction("lt", counter, max_iterations) - cond_var = bb.append_instruction("and", counter_lt_max, cond_var) + cond_var = bb.append_instruction("and", counter_ok, cond_var) bb.append_instruction("jnz", bb_type.target1.label, bb_type.target2.label, cond_var) - + else: - raise Exception() # unreachable + raise Exception() # unreachable fuzzer.ensure_all_vars_have_values() @@ -469,13 +443,13 @@ def run_passes(self, ctx: IRContext) -> None: "pass_list", [ # Test individual memory passes - [LoadEliminationPass], - [DeadStoreEliminationPass], - [MemMergingPass], + [LoadElimination], + [DeadStoreElimination], + [MemMergePass], # Test combinations - [LoadEliminationPass, DeadStoreEliminationPass], - [DeadStoreEliminationPass, LoadEliminationPass], - [LoadEliminationPass, MemMergingPass], + [LoadElimination, DeadStoreElimination], + [DeadStoreElimination, LoadElimination], + [LoadElimination, MemMergePass], ], ) @hp.given(ctx=venom_function_with_memory_ops()) @@ -508,9 +482,6 @@ def test_memory_passes_fuzzing(pass_list, ctx): def generate_sample_ir() -> IRContext: """Generate a sample IR for manual inspection.""" - import random - - random.seed(42) ctx = venom_function_with_memory_ops().example() return ctx @@ -518,11 +489,10 @@ def generate_sample_ir() -> IRContext: if __name__ == "__main__": ctx = generate_sample_ir() - if ctx and ctx.functions: - func = list(ctx.functions.values())[0] - print(f"Generated function with {func.num_basic_blocks} basic blocks:") - print(func) + func = list(ctx.functions.values())[0] + print(f"Generated function with {func.num_basic_blocks} basic blocks:") + print(func) - checker = MemoryFuzzChecker([LoadEliminationPass]) - checker.run_passes(ctx) - print("\nPasses completed successfully") + checker = MemoryFuzzChecker([MemMergePass]) + checker.run_passes(ctx) + print(ctx) From d42ed649d985583280864103a4b17836ffde3a9f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 20:43:22 +0200 Subject: [PATCH 08/24] add evm harness --- tests/functional/venom/test_memory_fuzzer.py | 96 +++++++++++++++++--- 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 60c6c03c31..e0bf070c27 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -13,11 +13,14 @@ import hypothesis.strategies as st import pytest +from tests.evm_backends.base_env import ExecutionReverted +from vyper.ir.compile_ir import assembly_to_evm +from vyper.venom import SingleUseExpansion, VenomCompiler from vyper.venom.analysis import IRAnalysesCache from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction -from vyper.venom.passes import DeadStoreElimination,LoadElimination, MemMergePass +from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass MEMORY_OPS = ["mload", "mstore", "mcopy"] @@ -36,8 +39,8 @@ } MAX_MEMORY_SIZE = 4096 -MAX_BASIC_BLOCKS = 8 -MAX_INSTRUCTIONS_PER_BLOCK = 8 +MAX_BASIC_BLOCKS = 50 +MAX_INSTRUCTIONS_PER_BLOCK = 50 MAX_LOOP_ITERATIONS = 12 @@ -324,8 +327,14 @@ def basic_block_instructions(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> No @st.composite -def venom_function_with_memory_ops(draw) -> IRContext: - """Generate a complete Venom IR function using IRBasicBlock API.""" +def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: + """Generate a complete Venom IR function using IRBasicBlock API. + + Returns: + tuple[IRContext, int]: The generated IR context and the required calldata size. + The calldata size includes both the initial memory seed (MAX_MEMORY_SIZE bytes) + and any additional calldata needed for unassigned variables. + """ fuzzer = MemoryFuzzer() func_name = IRLabel("_fuzz_function", is_symbol=True) @@ -408,7 +417,7 @@ def venom_function_with_memory_ops(draw) -> IRContext: fuzzer.ensure_all_vars_have_values() - return fuzzer.ctx + return fuzzer.ctx, fuzzer.calldata_offset class MemoryFuzzChecker: @@ -418,16 +427,33 @@ def __init__(self, passes: list[type], post_passes: list[type] = None): self.passes = passes self.post_passes = post_passes or [] - def run_passes(self, ctx: IRContext) -> None: + def compile_to_bytecode(self, ctx: IRContext) -> bytes: + """Compile Venom IR context to EVM bytecode.""" + # Need SingleUseExpansion for venom_to_assembly + for fn in ctx.functions.values(): + ac = IRAnalysesCache(fn) + SingleUseExpansion(ac, fn).run_pass() + + # Compile to assembly and then to bytecode + compiler = VenomCompiler([ctx]) + asm = compiler.generate_evm(no_optimize=False) + bytecode, _ = assembly_to_evm(asm) + return bytecode + + def run_passes(self, ctx: IRContext) -> IRContext: """ Run optimization passes on the IR context. - This method lets exceptions bubble up so Hypothesis can handle them properly. + Returns the optimized context. """ optimized_ctx = ctx.copy() for fn in optimized_ctx.functions.values(): ac = IRAnalysesCache(fn) + + # Convert to SSA form first if needed by the passes + MakeSSA(ac, fn).run_pass() + for pass_class in self.passes: pass_obj = pass_class(ac, fn) pass_obj.run_pass() @@ -436,6 +462,47 @@ def run_passes(self, ctx: IRContext) -> None: pass_obj = pass_class(ac, fn) pass_obj.run_pass() + return optimized_ctx + + def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, bytes]: + """Execute bytecode with given calldata and return success status and output.""" + deployed_address = env._deploy(bytecode, value=0) + + try: + result = env.message_call(to=deployed_address, data=calldata, value=0) + return True, result + except ExecutionReverted as e: + # return revert data if available + return False, e.args[0] if e.args else b"" + except Exception: + # other errors like out of gas + return False, b"" + + def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None: + """Check equivalence between unoptimized and optimized execution.""" + unopt_bytecode = self.compile_to_bytecode(ctx) + + opt_ctx = self.run_passes(ctx) + opt_bytecode = self.compile_to_bytecode(opt_ctx) + + unopt_success, unopt_output = self.execute_bytecode(unopt_bytecode, calldata, env) + opt_success, opt_output = self.execute_bytecode(opt_bytecode, calldata, env) + + assert ( + unopt_success == opt_success + ), f"Execution success mismatch: unopt={unopt_success}, opt={opt_success}" + assert ( + unopt_output == opt_output + ), f"Output mismatch: unopt={unopt_output.hex()}, opt={opt_output.hex()}" + + +@st.composite +def venom_with_calldata(draw): + """Generate Venom IR context with matching calldata.""" + ctx, calldata_size = draw(venom_function_with_memory_ops()) + calldata = draw(st.binary(min_size=calldata_size, max_size=calldata_size)) + return ctx, calldata + # Test with memory-related passes @pytest.mark.fuzzing @@ -452,7 +519,7 @@ def run_passes(self, ctx: IRContext) -> None: [LoadElimination, MemMergePass], ], ) -@hp.given(ctx=venom_function_with_memory_ops()) +@hp.given(venom_with_calldata()) @hp.settings( max_examples=100, suppress_health_check=( @@ -462,22 +529,23 @@ def run_passes(self, ctx: IRContext) -> None: ), deadline=None, ) -def test_memory_passes_fuzzing(pass_list, ctx): +def test_memory_passes_fuzzing(pass_list, venom_data, env): """ Property-based test for memory optimization passes. - Tests that memory passes do not crash on complex IR. + Tests that memory passes preserve semantics by comparing EVM execution results. """ + ctx, calldata = venom_data + hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}") if hasattr(ctx, "functions") and ctx.functions: func = list(ctx.functions.values())[0] hp.note(f"Generated function with {func.num_basic_blocks} basic blocks") - for bb in func.get_basic_blocks(): - hp.note(f"Block {bb.label.value}: {len(bb.instructions)} instructions") + hp.note(f"Calldata size: {len(calldata)} bytes") checker = MemoryFuzzChecker(pass_list) - checker.run_passes(ctx) + checker.check_equivalence(ctx, calldata, env) def generate_sample_ir() -> IRContext: From 50feb1cdcde263b8aa0ac80cdd900e1668db028c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 20:52:54 +0200 Subject: [PATCH 09/24] cleanup, add proper deploy code --- tests/functional/venom/test_memory_fuzzer.py | 52 ++++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index e0bf070c27..3d5448082d 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -429,7 +429,7 @@ def __init__(self, passes: list[type], post_passes: list[type] = None): def compile_to_bytecode(self, ctx: IRContext) -> bytes: """Compile Venom IR context to EVM bytecode.""" - # Need SingleUseExpansion for venom_to_assembly + # assumes MakeSSA has already been run for fn in ctx.functions.values(): ac = IRAnalysesCache(fn) SingleUseExpansion(ac, fn).run_pass() @@ -442,7 +442,7 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes: def run_passes(self, ctx: IRContext) -> IRContext: """ - Run optimization passes on the IR context. + Copies the IRContext and runs optimization passes on the copy of the IR context. Returns the optimized context. """ @@ -451,9 +451,6 @@ def run_passes(self, ctx: IRContext) -> IRContext: for fn in optimized_ctx.functions.values(): ac = IRAnalysesCache(fn) - # Convert to SSA form first if needed by the passes - MakeSSA(ac, fn).run_pass() - for pass_class in self.passes: pass_obj = pass_class(ac, fn) pass_obj.run_pass() @@ -466,34 +463,38 @@ def run_passes(self, ctx: IRContext) -> IRContext: def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, bytes]: """Execute bytecode with given calldata and return success status and output.""" - deployed_address = env._deploy(bytecode, value=0) + # wrap runtime bytecode in deploy bytecode that returns it + bytecode_len = len(bytecode) + bytecode_len_hex = hex(bytecode_len)[2:].rjust(4, "0") + # deploy preamble: PUSH2 len, 0, DUP2, PUSH1 0a, 0, CODECOPY, RETURN + deploy_preamble = bytes.fromhex("61" + bytecode_len_hex + "3d81600a3d39f3") + deploy_bytecode = deploy_preamble + bytecode + + deployed_address = env._deploy(deploy_bytecode) try: - result = env.message_call(to=deployed_address, data=calldata, value=0) + result = env.message_call(to=deployed_address, data=calldata) return True, result - except ExecutionReverted as e: - # return revert data if available - return False, e.args[0] if e.args else b"" - except Exception: - # other errors like out of gas + except EvmError as e: return False, b"" def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None: """Check equivalence between unoptimized and optimized execution.""" - unopt_bytecode = self.compile_to_bytecode(ctx) + # run MakeSSA on the original context first + for fn in ctx.functions.values(): + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() opt_ctx = self.run_passes(ctx) - opt_bytecode = self.compile_to_bytecode(opt_ctx) - unopt_success, unopt_output = self.execute_bytecode(unopt_bytecode, calldata, env) - opt_success, opt_output = self.execute_bytecode(opt_bytecode, calldata, env) + bytecode1 = self.compile_to_bytecode(ctx) + bytecode2 = self.compile_to_bytecode(opt_ctx) - assert ( - unopt_success == opt_success - ), f"Execution success mismatch: unopt={unopt_success}, opt={opt_success}" - assert ( - unopt_output == opt_output - ), f"Output mismatch: unopt={unopt_output.hex()}, opt={opt_output.hex()}" + succ1, out1 = self.execute_bytecode(bytecode1, calldata, env) + succ2, out2 = self.execute_bytecode(bytecode2, calldata, env) + + assert succ1 == succ2, (succ1, out1, succ2, out2) + assert out1 == out2, (succ1, out1, succ2, out2) @st.composite @@ -539,10 +540,9 @@ def test_memory_passes_fuzzing(pass_list, venom_data, env): hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}") - if hasattr(ctx, "functions") and ctx.functions: - func = list(ctx.functions.values())[0] - hp.note(f"Generated function with {func.num_basic_blocks} basic blocks") - hp.note(f"Calldata size: {len(calldata)} bytes") + func = list(ctx.functions.values())[0] + hp.note(f"Generated function with {func.num_basic_blocks} basic blocks") + hp.note(f"Calldata size: {len(calldata)} bytes") checker = MemoryFuzzChecker(pass_list) checker.check_equivalence(ctx, calldata, env) From 554d34e0ca69138a62f79a63dc40ef8de36dc0c7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 20:53:56 +0200 Subject: [PATCH 10/24] lint --- tests/functional/venom/test_memory_fuzzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 3d5448082d..a38640ece1 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -13,7 +13,7 @@ import hypothesis.strategies as st import pytest -from tests.evm_backends.base_env import ExecutionReverted +from tests.evm_backends.base_env import EvmError from vyper.ir.compile_ir import assembly_to_evm from vyper.venom import SingleUseExpansion, VenomCompiler from vyper.venom.analysis import IRAnalysesCache @@ -475,7 +475,7 @@ def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, try: result = env.message_call(to=deployed_address, data=calldata) return True, result - except EvmError as e: + except EvmError: return False, b"" def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None: From 3c700c56e23769a16c53749f2e8b256f351c19f3 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 21:55:40 +0200 Subject: [PATCH 11/24] debug, analysis for available vars --- tests/evm_backends/base_env.py | 4 +- tests/evm_backends/pyevm_env.py | 2 +- tests/evm_backends/revm_env.py | 2 +- tests/functional/venom/test_memory_fuzzer.py | 175 +++++++++++++------ vyper/venom/basicblock.py | 6 +- vyper/venom/function.py | 1 + 6 files changed, 136 insertions(+), 54 deletions(-) diff --git a/tests/evm_backends/base_env.py b/tests/evm_backends/base_env.py index 77a0182664..0dd2e6dc97 100644 --- a/tests/evm_backends/base_env.py +++ b/tests/evm_backends/base_env.py @@ -61,7 +61,7 @@ def deploy(self, abi: list[dict], bytecode: bytes, value=0, *args, **kwargs): ctor = ABIFunction(ctor_abi, contract_name=factory._name) initcode += ctor.prepare_calldata(*args, **kwargs) - deployed_at = self._deploy(initcode, value) + deployed_at = self._deploy(initcode, value=value) address = to_checksum_address(deployed_at) return factory.at(self, address) @@ -181,7 +181,7 @@ def get_excess_blob_gas(self) -> Optional[int]: def set_excess_blob_gas(self, param): raise NotImplementedError # must be implemented by subclasses - def _deploy(self, code: bytes, value: int, gas: int | None = None) -> str: + def _deploy(self, code: bytes, value: int = 0, gas: int | None = None) -> str: raise NotImplementedError # must be implemented by subclasses @staticmethod diff --git a/tests/evm_backends/pyevm_env.py b/tests/evm_backends/pyevm_env.py index 6c510278a7..7e91780392 100644 --- a/tests/evm_backends/pyevm_env.py +++ b/tests/evm_backends/pyevm_env.py @@ -189,7 +189,7 @@ def get_excess_blob_gas(self) -> Optional[int]: def set_excess_blob_gas(self, param): self._context._excess_blob_gas = param - def _deploy(self, code: bytes, value: int, gas: int = None) -> str: + def _deploy(self, code: bytes, value: int = 0, gas: int = None) -> str: sender = _addr(self.deployer) target_address = self._generate_contract_address(sender) diff --git a/tests/evm_backends/revm_env.py b/tests/evm_backends/revm_env.py index d5a7570f96..1c3643a591 100644 --- a/tests/evm_backends/revm_env.py +++ b/tests/evm_backends/revm_env.py @@ -135,7 +135,7 @@ def get_blob_gasprice(self) -> Optional[int]: def set_excess_blob_gas(self, value): self._evm.env.block.excess_blob_gas = value - def _deploy(self, code: bytes, value: int, gas: int = None) -> str: + def _deploy(self, code: bytes, value: int = 0, gas: int = None) -> str: try: return self._evm.deploy(self.deployer, code, value, gas) except RuntimeError as e: diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index a38640ece1..a68e1d0f7f 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -15,12 +15,12 @@ from tests.evm_backends.base_env import EvmError from vyper.ir.compile_ir import assembly_to_evm -from vyper.venom import SingleUseExpansion, VenomCompiler +from vyper.venom import VenomCompiler from vyper.venom.analysis import IRAnalysesCache from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction -from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass +from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass, AssignElimination, SingleUseExpansion, SimplifyCFGPass MEMORY_OPS = ["mload", "mstore", "mcopy"] @@ -38,7 +38,7 @@ 0x9: "blake2f", } -MAX_MEMORY_SIZE = 4096 +MAX_MEMORY_SIZE = 256 MAX_BASIC_BLOCKS = 50 MAX_INSTRUCTIONS_PER_BLOCK = 50 MAX_LOOP_ITERATIONS = 12 @@ -89,6 +89,8 @@ def __init__(self): self.calldata_offset = MAX_MEMORY_SIZE self.available_vars = [] self.allocated_memory_slots = set() + # track which variables are available in each block + self.bb_available_vars = {} def get_next_variable(self) -> IRVariable: """Generate a new unique variable.""" @@ -118,17 +120,19 @@ def get_next_bb_label(self) -> IRLabel: self.bb_counter += 1 return IRLabel(f"bb{self.bb_counter}") - def get_random_variable(self, draw) -> IRVariable: + def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable: """Get a random available variable or create a new one.""" - if self.available_vars and draw(st.booleans()): - return draw(st.sampled_from(self.available_vars)) + available_in_bb = self.bb_available_vars.get(bb, []) + if available_in_bb and draw(st.booleans()): + return draw(st.sampled_from(available_in_bb)) else: return self.get_next_variable() - def get_memory_address(self, draw) -> IRVariable | IRLiteral: + def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral: """Get a memory address, biased towards interesting optimizer-relevant locations.""" - if self.available_vars and draw(st.booleans()): - return draw(st.sampled_from(self.available_vars)) + available_in_bb = self.bb_available_vars.get(bb, []) + if available_in_bb and draw(st.booleans()): + return draw(st.sampled_from(available_in_bb)) if self.allocated_memory_slots and draw(st.booleans()): # bias towards addresses near existing allocations to create aliasing opportunities @@ -167,22 +171,30 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: """Generate and append a memory instruction to current basic block.""" op = draw(st.sampled_from(MEMORY_OPS)) + # track variables defined so far in this block + if bb not in fuzzer.bb_available_vars: + fuzzer.bb_available_vars[bb] = [] + if op == "mload": - addr = fuzzer.get_memory_address(draw) + addr = fuzzer.get_memory_address(draw, bb) result_var = bb.append_instruction("mload", addr) fuzzer.available_vars.append(result_var) + # add to variables available in this block + fuzzer.bb_available_vars[bb].append(result_var) elif op == "mstore": - if fuzzer.available_vars and draw(st.booleans()): - value = draw(st.sampled_from(fuzzer.available_vars)) + # can use variables defined earlier in this block + available_in_bb = fuzzer.bb_available_vars.get(bb, []) + if available_in_bb and draw(st.booleans()): + value = draw(st.sampled_from(available_in_bb)) else: value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256 - 1))) - addr = fuzzer.get_memory_address(draw) + addr = fuzzer.get_memory_address(draw, bb) bb.append_instruction("mstore", value, addr) elif op == "mcopy": - dest = fuzzer.get_memory_address(draw) - src = fuzzer.get_memory_address(draw) + dest = fuzzer.get_memory_address(draw, bb) + src = fuzzer.get_memory_address(draw, bb) length = draw(copy_length()) bb.append_instruction("mcopy", dest, src, IRLiteral(length)) @@ -214,6 +226,11 @@ def control_flow_graph(draw, basic_blocks): while remaining_blocks: source = draw(st.sampled_from(reachable_blocks)) + + # we have already visited it + if source in cfg: + continue + target = draw(st.sampled_from(remaining_blocks)) # target is now reachable, but it may not be in cfg yet @@ -269,8 +286,8 @@ def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: precompile_addr = draw(st.sampled_from(list(PRECOMPILES.keys()))) precompile_name = PRECOMPILES[precompile_addr] - input_ofst = fuzzer.get_memory_address(draw) - output_ofst = fuzzer.get_memory_address(draw) + input_ofst = fuzzer.get_memory_address(draw, bb) + output_ofst = fuzzer.get_memory_address(draw, bb) if precompile_name == "ecrecover": input_size = IRLiteral(128) # v, r, s, hash @@ -378,9 +395,63 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: for addr in counter_addrs: entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr)) + # first pass: generate instructions for each block for bb in basic_blocks: draw(basic_block_instructions(fuzzer, bb)) - + + # compute available variables at each block based on CFG + # a variable is available if it's defined in ALL paths to that block + bb_incoming_available = {} + + # collect variables defined in each block (already in fuzzer.bb_available_vars) + # and initialize incoming available sets + for bb in basic_blocks: + bb_incoming_available[bb] = set() + + # propagate available variables through CFG + # entry block starts with empty set + bb_incoming_available[basic_blocks[0]] = set() + + # iteratively propagate until fixpoint + changed = True + while changed: + changed = False + for bb in basic_blocks: + # find predecessors + preds = [] + for pred_bb in basic_blocks: + pred_type = cfg[pred_bb] + if isinstance(pred_type, _JumpBB) and pred_type.target == bb: + preds.append(pred_bb) + elif isinstance(pred_type, _BranchBB) and (pred_type.target1 == bb or pred_type.target2 == bb): + preds.append(pred_bb) + + if preds: + # available vars = intersection of all predecessors' available + defined vars + new_available = None + for pred in preds: + # variables available at end of predecessor = incoming + defined in pred + pred_defined = set(fuzzer.bb_available_vars.get(pred, [])) + pred_avail = bb_incoming_available[pred] | pred_defined + + if new_available is None: + new_available = pred_avail + else: + new_available = new_available & pred_avail + + if new_available != bb_incoming_available[bb]: + bb_incoming_available[bb] = new_available + changed = True + + # update fuzzer's bb_available_vars to include incoming variables + for bb in basic_blocks: + incoming = list(bb_incoming_available[bb]) + existing = fuzzer.bb_available_vars.get(bb, []) + # incoming vars are available at the start, then vars defined in the block + fuzzer.bb_available_vars[bb] = incoming + existing + + # second pass: add terminators using available variables + for bb in basic_blocks: bb_type = cfg[bb] if isinstance(bb_type, _ReturnBB): @@ -390,7 +461,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: bb.append_instruction("jmp", bb_type.target.label) elif isinstance(bb_type, _BranchBB): - cond_var = fuzzer.get_random_variable(draw) + cond_var = fuzzer.get_random_variable(draw, bb) # get bottom bit, for bias reasons cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) @@ -410,7 +481,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: cond_var = bb.append_instruction("and", counter_ok, cond_var) - bb.append_instruction("jnz", bb_type.target1.label, bb_type.target2.label, cond_var) + bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label) else: raise Exception() # unreachable @@ -423,20 +494,19 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: class MemoryFuzzChecker: """A pluggable checker for memory passes using fuzzing.""" - def __init__(self, passes: list[type], post_passes: list[type] = None): + def __init__(self, passes: list[type]): self.passes = passes - self.post_passes = post_passes or [] def compile_to_bytecode(self, ctx: IRContext) -> bytes: """Compile Venom IR context to EVM bytecode.""" # assumes MakeSSA has already been run for fn in ctx.functions.values(): ac = IRAnalysesCache(fn) + SimplifyCFGPass(ac, fn).run_pass() SingleUseExpansion(ac, fn).run_pass() - # Compile to assembly and then to bytecode compiler = VenomCompiler([ctx]) - asm = compiler.generate_evm(no_optimize=False) + asm = compiler.generate_evm() bytecode, _ = assembly_to_evm(asm) return bytecode @@ -455,10 +525,6 @@ def run_passes(self, ctx: IRContext) -> IRContext: pass_obj = pass_class(ac, fn) pass_obj.run_pass() - for pass_class in self.post_passes: - pass_obj = pass_class(ac, fn) - pass_obj.run_pass() - return optimized_ctx def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, bytes]: @@ -484,6 +550,7 @@ def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None: for fn in ctx.functions.values(): ac = IRAnalysesCache(fn) MakeSSA(ac, fn).run_pass() + AssignElimination(ac, fn).run_pass() opt_ctx = self.run_passes(ctx) @@ -507,35 +574,43 @@ def venom_with_calldata(draw): # Test with memory-related passes @pytest.mark.fuzzing -@pytest.mark.parametrize( - "pass_list", - [ - # Test individual memory passes - [LoadElimination], - [DeadStoreElimination], - [MemMergePass], - # Test combinations - [LoadElimination, DeadStoreElimination], - [DeadStoreElimination, LoadElimination], - [LoadElimination, MemMergePass], - ], -) -@hp.given(venom_with_calldata()) +#@pytest.mark.parametrize( +# "pass_list", +# [ +# # Test individual memory passes +# [MemMergePass], +# [LoadElimination], +# [DeadStoreElimination], +# # Test combinations +# [LoadElimination, DeadStoreElimination], +# [DeadStoreElimination, LoadElimination], +# [LoadElimination, MemMergePass], +# ], +#) +@hp.given(venom_data=venom_with_calldata()) + @hp.settings( - max_examples=100, + max_examples=1000, suppress_health_check=( hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow, - hp.HealthCheck.filter_too_much, ), deadline=None, + phases=( + hp.Phase.explicit, + hp.Phase.reuse, + hp.Phase.generate, + hp.Phase.target, + # Phase.shrink, # can force long waiting for examples + ), ) -def test_memory_passes_fuzzing(pass_list, venom_data, env): +def test_memory_passes_fuzzing(venom_data, env): """ Property-based test for memory optimization passes. Tests that memory passes preserve semantics by comparing EVM execution results. """ + pass_list = [MemMergePass] ctx, calldata = venom_data hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}") @@ -543,6 +618,7 @@ def test_memory_passes_fuzzing(pass_list, venom_data, env): func = list(ctx.functions.values())[0] hp.note(f"Generated function with {func.num_basic_blocks} basic blocks") hp.note(f"Calldata size: {len(calldata)} bytes") + hp.note(str(ctx)) checker = MemoryFuzzChecker(pass_list) checker.check_equivalence(ctx, calldata, env) @@ -550,17 +626,18 @@ def test_memory_passes_fuzzing(pass_list, venom_data, env): def generate_sample_ir() -> IRContext: """Generate a sample IR for manual inspection.""" - ctx = venom_function_with_memory_ops().example() + ctx, _ = venom_function_with_memory_ops().example() return ctx if __name__ == "__main__": ctx = generate_sample_ir() - func = list(ctx.functions.values())[0] - print(f"Generated function with {func.num_basic_blocks} basic blocks:") - print(func) + #func = list(ctx.functions.values())[0] + #print(func) checker = MemoryFuzzChecker([MemMergePass]) checker.run_passes(ctx) print(ctx) + bytecode = checker.compile_to_bytecode(ctx) + print(bytecode.hex()) diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 27d1e2c7fd..4a2d2f84b4 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -668,7 +668,11 @@ def copy(self) -> IRBasicBlock: def __repr__(self) -> str: printer = ir_printer.get() - s = f"{repr(self.label)}: ; OUT={[bb.label for bb in self.out_bbs]}\n" + s = f"{repr(self.label)}:" + if self.is_terminated: + s += f" ; OUT={[bb.label for bb in self.out_bbs]}" + s += "\n" + if printer and hasattr(printer, "_pre_block"): s += printer._pre_block(self) for inst in self.instructions: diff --git a/vyper/venom/function.py b/vyper/venom/function.py index 3ad63b207a..d9dc2e07b2 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -161,6 +161,7 @@ def error_msg(self) -> Optional[str]: def copy(self): new = IRFunction(self.name) + new.clear_basic_blocks() for bb in self.get_basic_blocks(): new_bb = bb.copy() new.append_basic_block(new_bb) From 1b52d0a795a258922f684ca16cf03a6a276186dd Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 22:34:47 +0200 Subject: [PATCH 12/24] fix weakref bug --- vyper/venom/function.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vyper/venom/function.py b/vyper/venom/function.py index d9dc2e07b2..7fb483baa0 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -66,6 +66,7 @@ def append_basic_block(self, bb: IRBasicBlock): assert isinstance(bb, IRBasicBlock), bb assert bb.label.name not in self._basic_block_dict, bb.label self._basic_block_dict[bb.label.name] = bb + bb.parent = self # ensure parent is updated def remove_basic_block(self, bb: IRBasicBlock): assert isinstance(bb, IRBasicBlock), bb From 0ff2b9b43992e50ab18ef5ec7f1b360c71b40462 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 23:12:44 +0200 Subject: [PATCH 13/24] fix assumption in venom_to_assembly: entry block has no predecessors --- vyper/venom/venom_to_assembly.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 4c5a2bfcda..f4b28a3bce 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -342,6 +342,8 @@ def _generate_evm_for_basicblock_r( fn = basicblock.parent if basicblock == fn.entry: + # Entry block should not have predecessors (no back edges to entry) + assert len(self.cfg.cfg_in(basicblock)) == 0 self._prepare_stack_for_function(asm, fn, stack) if len(self.cfg.cfg_in(basicblock)) == 1: From a67d12d10c0e878e3990669d1fd4cfa669aee699 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 23:13:10 +0200 Subject: [PATCH 14/24] allocate variables after the fact, rather than finding available variables --- tests/functional/venom/test_memory_fuzzer.py | 117 +++++++++---------- 1 file changed, 57 insertions(+), 60 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index a68e1d0f7f..d070d01e81 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -78,6 +78,11 @@ def has_back_edge(self) -> bool: return self.counter_addr is not None +class SymbolicVar(IRVariable): + """Placeholder for a variable that will be resolved later""" + pass + + class MemoryFuzzer: """Generates random Venom IR with memory operations using IRBasicBlock API.""" @@ -91,6 +96,9 @@ def __init__(self): self.allocated_memory_slots = set() # track which variables are available in each block self.bb_available_vars = {} + # symbolic variable tracking + self.symbolic_counter = 0 + self.symbolic_mapping = {} # SymbolicVar -> IRVariable def get_next_variable(self) -> IRVariable: """Generate a new unique variable.""" @@ -98,6 +106,35 @@ def get_next_variable(self) -> IRVariable: var = IRVariable(f"v{self.variable_counter}") self.available_vars.append(var) return var + + def fresh_symbolic(self) -> SymbolicVar: + """Create a new symbolic variable""" + self.symbolic_counter += 1 + return SymbolicVar(f"%sym_{self.symbolic_counter}") + + def resolve_all_variables(self): + """After building all blocks, replace symbolic vars with real ones""" + # Map all symbolic vars to real variables + for bb in self.function.get_basic_blocks(): + for inst in bb.instructions: + # Handle output + if inst.output and isinstance(inst.output, SymbolicVar): + if inst.output not in self.symbolic_mapping: + self.symbolic_mapping[inst.output] = self.get_next_variable() + inst.output = self.symbolic_mapping[inst.output] + + # Handle inputs + new_operands = [] + for op in inst.operands: + if isinstance(op, SymbolicVar): + if op not in self.symbolic_mapping: + # This symbolic var was never defined as output + # Create a fresh variable for it + self.symbolic_mapping[op] = self.get_next_variable() + new_operands.append(self.symbolic_mapping[op]) + else: + new_operands.append(op) + inst.operands = new_operands def ensure_all_vars_have_values(self) -> None: """Ensure all available variables have values by using calldataload for unassigned ones.""" @@ -120,13 +157,13 @@ def get_next_bb_label(self) -> IRLabel: self.bb_counter += 1 return IRLabel(f"bb{self.bb_counter}") - def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable: + def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable | SymbolicVar: """Get a random available variable or create a new one.""" available_in_bb = self.bb_available_vars.get(bb, []) if available_in_bb and draw(st.booleans()): return draw(st.sampled_from(available_in_bb)) else: - return self.get_next_variable() + return self.fresh_symbolic() def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral: """Get a memory address, biased towards interesting optimizer-relevant locations.""" @@ -177,8 +214,8 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: if op == "mload": addr = fuzzer.get_memory_address(draw, bb) - result_var = bb.append_instruction("mload", addr) - fuzzer.available_vars.append(result_var) + result_var = fuzzer.fresh_symbolic() + bb.append_instruction("mload", addr, ret=result_var) # add to variables available in this block fuzzer.bb_available_vars[bb].append(result_var) @@ -202,6 +239,7 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: raise ValueError("unreachable") + @st.composite def control_flow_graph(draw, basic_blocks): """ @@ -209,6 +247,7 @@ def control_flow_graph(draw, basic_blocks): 1. All blocks are reachable from entry 2. No infinite loops (all loops terminate within 12 iterations) 3. Proper use of jump and branch instructions + 4. No back edges to entry block """ cfg: dict[IRBasicBlock, _BBType] = {} @@ -219,6 +258,9 @@ def control_flow_graph(draw, basic_blocks): forward_targets = {} for i, bb in enumerate(basic_blocks): forward_targets[bb] = basic_blocks[i + 1 :] + + # All blocks except entry (to prevent back edges to entry) + non_entry_blocks = basic_blocks[1:] # create a spanning tree to ensure all blocks are reachable remaining_blocks = basic_blocks[1:] # exclude entry block @@ -240,7 +282,9 @@ def control_flow_graph(draw, basic_blocks): if draw(st.booleans()): cfg[source] = _JumpBB(target=target) else: - other_target = draw(st.sampled_from(basic_blocks)) + # For branches, allow any block as the other target except entry + # (target is already guaranteed to be forward) + other_target = draw(st.sampled_from(non_entry_blocks)) cfg[source] = _BranchBB(target1=target, target2=other_target) # classify remaining blocks that were not handled during spanning @@ -257,8 +301,9 @@ def control_flow_graph(draw, basic_blocks): target = draw(st.sampled_from(forward_targets[bb])) cfg[bb] = _JumpBB(target=target) else: # branch - target1 = draw(st.sampled_from(basic_blocks)) - target2 = draw(st.sampled_from(basic_blocks)) + # Choose targets, but never allow entry as a target + target1 = draw(st.sampled_from(non_entry_blocks)) + target2 = draw(st.sampled_from(non_entry_blocks)) is_back_edge1 = basic_blocks.index(target1) <= basic_blocks.index(bb) is_back_edge2 = basic_blocks.index(target2) <= basic_blocks.index(bb) @@ -395,62 +440,11 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: for addr in counter_addrs: entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr)) - # first pass: generate instructions for each block + # generate instructions for each block for bb in basic_blocks: draw(basic_block_instructions(fuzzer, bb)) - # compute available variables at each block based on CFG - # a variable is available if it's defined in ALL paths to that block - bb_incoming_available = {} - - # collect variables defined in each block (already in fuzzer.bb_available_vars) - # and initialize incoming available sets - for bb in basic_blocks: - bb_incoming_available[bb] = set() - - # propagate available variables through CFG - # entry block starts with empty set - bb_incoming_available[basic_blocks[0]] = set() - - # iteratively propagate until fixpoint - changed = True - while changed: - changed = False - for bb in basic_blocks: - # find predecessors - preds = [] - for pred_bb in basic_blocks: - pred_type = cfg[pred_bb] - if isinstance(pred_type, _JumpBB) and pred_type.target == bb: - preds.append(pred_bb) - elif isinstance(pred_type, _BranchBB) and (pred_type.target1 == bb or pred_type.target2 == bb): - preds.append(pred_bb) - - if preds: - # available vars = intersection of all predecessors' available + defined vars - new_available = None - for pred in preds: - # variables available at end of predecessor = incoming + defined in pred - pred_defined = set(fuzzer.bb_available_vars.get(pred, [])) - pred_avail = bb_incoming_available[pred] | pred_defined - - if new_available is None: - new_available = pred_avail - else: - new_available = new_available & pred_avail - - if new_available != bb_incoming_available[bb]: - bb_incoming_available[bb] = new_available - changed = True - - # update fuzzer's bb_available_vars to include incoming variables - for bb in basic_blocks: - incoming = list(bb_incoming_available[bb]) - existing = fuzzer.bb_available_vars.get(bb, []) - # incoming vars are available at the start, then vars defined in the block - fuzzer.bb_available_vars[bb] = incoming + existing - - # second pass: add terminators using available variables + # add terminators for bb in basic_blocks: bb_type = cfg[bb] @@ -486,6 +480,9 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: else: raise Exception() # unreachable + # resolve all symbolic variables to real ones + fuzzer.resolve_all_variables() + fuzzer.ensure_all_vars_have_values() return fuzzer.ctx, fuzzer.calldata_offset From 798055b85b9ca01556e90200bf3d418afcd7180b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sun, 8 Jun 2025 23:14:25 +0200 Subject: [PATCH 15/24] lint --- tests/functional/venom/test_memory_fuzzer.py | 44 +++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index d070d01e81..710efce676 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -20,7 +20,15 @@ from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable from vyper.venom.context import IRContext from vyper.venom.function import IRFunction -from vyper.venom.passes import DeadStoreElimination, LoadElimination, MakeSSA, MemMergePass, AssignElimination, SingleUseExpansion, SimplifyCFGPass +from vyper.venom.passes import ( + AssignElimination, + DeadStoreElimination, + LoadElimination, + MakeSSA, + MemMergePass, + SimplifyCFGPass, + SingleUseExpansion, +) MEMORY_OPS = ["mload", "mstore", "mcopy"] @@ -80,6 +88,7 @@ def has_back_edge(self) -> bool: class SymbolicVar(IRVariable): """Placeholder for a variable that will be resolved later""" + pass @@ -106,12 +115,12 @@ def get_next_variable(self) -> IRVariable: var = IRVariable(f"v{self.variable_counter}") self.available_vars.append(var) return var - + def fresh_symbolic(self) -> SymbolicVar: """Create a new symbolic variable""" self.symbolic_counter += 1 return SymbolicVar(f"%sym_{self.symbolic_counter}") - + def resolve_all_variables(self): """After building all blocks, replace symbolic vars with real ones""" # Map all symbolic vars to real variables @@ -122,7 +131,7 @@ def resolve_all_variables(self): if inst.output not in self.symbolic_mapping: self.symbolic_mapping[inst.output] = self.get_next_variable() inst.output = self.symbolic_mapping[inst.output] - + # Handle inputs new_operands = [] for op in inst.operands: @@ -239,7 +248,6 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: raise ValueError("unreachable") - @st.composite def control_flow_graph(draw, basic_blocks): """ @@ -258,7 +266,7 @@ def control_flow_graph(draw, basic_blocks): forward_targets = {} for i, bb in enumerate(basic_blocks): forward_targets[bb] = basic_blocks[i + 1 :] - + # All blocks except entry (to prevent back edges to entry) non_entry_blocks = basic_blocks[1:] @@ -443,7 +451,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: # generate instructions for each block for bb in basic_blocks: draw(basic_block_instructions(fuzzer, bb)) - + # add terminators for bb in basic_blocks: bb_type = cfg[bb] @@ -482,7 +490,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: # resolve all symbolic variables to real ones fuzzer.resolve_all_variables() - + fuzzer.ensure_all_vars_have_values() return fuzzer.ctx, fuzzer.calldata_offset @@ -571,7 +579,7 @@ def venom_with_calldata(draw): # Test with memory-related passes @pytest.mark.fuzzing -#@pytest.mark.parametrize( +# @pytest.mark.parametrize( # "pass_list", # [ # # Test individual memory passes @@ -583,23 +591,19 @@ def venom_with_calldata(draw): # [DeadStoreElimination, LoadElimination], # [LoadElimination, MemMergePass], # ], -#) +# ) @hp.given(venom_data=venom_with_calldata()) - @hp.settings( max_examples=1000, - suppress_health_check=( - hp.HealthCheck.data_too_large, - hp.HealthCheck.too_slow, - ), + suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow), deadline=None, - phases=( + phases=( hp.Phase.explicit, hp.Phase.reuse, hp.Phase.generate, hp.Phase.target, - # Phase.shrink, # can force long waiting for examples - ), + # Phase.shrink, # can force long waiting for examples + ), ) def test_memory_passes_fuzzing(venom_data, env): """ @@ -630,8 +634,8 @@ def generate_sample_ir() -> IRContext: if __name__ == "__main__": ctx = generate_sample_ir() - #func = list(ctx.functions.values())[0] - #print(func) + # func = list(ctx.functions.values())[0] + # print(func) checker = MemoryFuzzChecker([MemMergePass]) checker.run_passes(ctx) From 69383a89153c92240cb63861d0d86751f222bf29 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 9 Jun 2025 10:59:38 +0200 Subject: [PATCH 16/24] fix variable allocation --- tests/functional/venom/test_memory_fuzzer.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 710efce676..122daa4bbf 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -98,7 +98,6 @@ class MemoryFuzzer: def __init__(self): self.ctx = IRContext() self.function = None - self.variable_counter = 0 self.bb_counter = 0 self.calldata_offset = MAX_MEMORY_SIZE self.available_vars = [] @@ -110,9 +109,9 @@ def __init__(self): self.symbolic_mapping = {} # SymbolicVar -> IRVariable def get_next_variable(self) -> IRVariable: - """Generate a new unique variable.""" - self.variable_counter += 1 - var = IRVariable(f"v{self.variable_counter}") + """Generate a new unique variable using the function's allocator.""" + assert self.function is not None, "Function must be set before allocating variables" + var = self.function.get_next_variable() self.available_vars.append(var) return var @@ -493,6 +492,10 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: fuzzer.ensure_all_vars_have_values() + # freshen variable names for easier debugging + for fn in fuzzer.ctx.functions.values(): + fn.freshen_varnames() + return fuzzer.ctx, fuzzer.calldata_offset From f6b9bdd9bd99fdb311df0a9683248661dcbe6d08 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 9 Jun 2025 11:33:30 +0200 Subject: [PATCH 17/24] simplify symbolic vars --- tests/functional/venom/test_memory_fuzzer.py | 24 +++++++------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 122daa4bbf..851891f201 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -106,7 +106,6 @@ def __init__(self): self.bb_available_vars = {} # symbolic variable tracking self.symbolic_counter = 0 - self.symbolic_mapping = {} # SymbolicVar -> IRVariable def get_next_variable(self) -> IRVariable: """Generate a new unique variable using the function's allocator.""" @@ -122,26 +121,19 @@ def fresh_symbolic(self) -> SymbolicVar: def resolve_all_variables(self): """After building all blocks, replace symbolic vars with real ones""" - # Map all symbolic vars to real variables + # resolve "symbolic" vars to real variables + symbolic_mapping = defaultdict(self.get_next_variable) for bb in self.function.get_basic_blocks(): for inst in bb.instructions: - # Handle output + # remap all "symbolic" variables if inst.output and isinstance(inst.output, SymbolicVar): - if inst.output not in self.symbolic_mapping: - self.symbolic_mapping[inst.output] = self.get_next_variable() - inst.output = self.symbolic_mapping[inst.output] + inst.output = symbolic_mapping[inst.output] - # Handle inputs new_operands = [] for op in inst.operands: if isinstance(op, SymbolicVar): - if op not in self.symbolic_mapping: - # This symbolic var was never defined as output - # Create a fresh variable for it - self.symbolic_mapping[op] = self.get_next_variable() - new_operands.append(self.symbolic_mapping[op]) - else: - new_operands.append(op) + op = symbolic_mapping[op] + new_operands.append(op) inst.operands = new_operands def ensure_all_vars_have_values(self) -> None: @@ -244,7 +236,7 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: bb.append_instruction("mcopy", dest, src, IRLiteral(length)) else: - raise ValueError("unreachable") + raise Exception("unreachable") @st.composite @@ -597,7 +589,7 @@ def venom_with_calldata(draw): # ) @hp.given(venom_data=venom_with_calldata()) @hp.settings( - max_examples=1000, + max_examples=10, suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow), deadline=None, phases=( From dd8f3659d32ad72f4c7a87e09968a8b6f9bde6f6 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 9 Jun 2025 12:04:35 +0200 Subject: [PATCH 18/24] update back edge logic --- tests/functional/venom/test_memory_fuzzer.py | 33 ++++++++++++++------ vyper/venom/venom_to_assembly.py | 2 +- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 851891f201..a6bd545055 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -5,7 +5,7 @@ memory optimization passes. It uses the IRBasicBlock API directly and can be plugged with any Venom passes. """ - +from collections import defaultdict from dataclasses import dataclass from typing import Optional @@ -82,7 +82,7 @@ class _BranchBB(_BBType): counter_addr: Optional[int] = None @property - def has_back_edge(self) -> bool: + def needs_loop_counter(self) -> bool: return self.counter_addr is not None @@ -284,12 +284,19 @@ def control_flow_graph(draw, basic_blocks): # For branches, allow any block as the other target except entry # (target is already guaranteed to be forward) other_target = draw(st.sampled_from(non_entry_blocks)) + + is_back_edge = basic_blocks.index(other_target) <= basic_blocks.index(source) + # counter_addr = loop_counter_addr if is_back_edge else None + + # if other_target is the back edge, swap so back edge is always target1 + if is_back_edge: + other_target, target = target, other_target cfg[source] = _BranchBB(target1=target, target2=other_target) # classify remaining blocks that were not handled during spanning # tree construction. - loop_counter_addr = MAX_MEMORY_SIZE + loop_counter_addr = MAX_MEMORY_SIZE for bb in basic_blocks: if bb in cfg: continue @@ -313,6 +320,11 @@ def control_flow_graph(draw, basic_blocks): is_back_edge2 = False contains_back_edge = is_back_edge1 or is_back_edge2 + + # swap targets so target2 is always a forward edge + if is_back_edge2 and not is_back_edge1: + target1, target2 = target2, target1 + counter_addr = loop_counter_addr if contains_back_edge else None cfg[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr) @@ -458,22 +470,19 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: # get bottom bit, for bias reasons cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) - if bb_type.has_back_edge: + if bb_type.needs_loop_counter: loop_counter_addr = IRLiteral(bb_type.counter_addr) counter = bb.append_instruction("mload", loop_counter_addr) incr_counter = bb.append_instruction("add", counter, IRLiteral(1)) bb.append_instruction("mstore", incr_counter, loop_counter_addr) - # exit loop when counter >= MAX_LOOP_ITERATIONS - # (note we are guaranteed that second target provides forward - # progress) max_iterations = IRLiteral(MAX_LOOP_ITERATIONS) - # counter < iterbound counter_ok = bb.append_instruction("lt", counter, max_iterations) cond_var = bb.append_instruction("and", counter_ok, cond_var) + # when there is a back edge, target2 is always the forward edge bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label) else: @@ -504,6 +513,10 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes: ac = IRAnalysesCache(fn) SimplifyCFGPass(ac, fn).run_pass() SingleUseExpansion(ac, fn).run_pass() + MakeSSA(ac, fn).run_pass() + fn.freshen_varnames() + + hp.note(str(ctx)) compiler = VenomCompiler([ctx]) asm = compiler.generate_evm() @@ -551,8 +564,10 @@ def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None: ac = IRAnalysesCache(fn) MakeSSA(ac, fn).run_pass() AssignElimination(ac, fn).run_pass() + hp.note("UNOPTIMIZED: " + str(ctx)) opt_ctx = self.run_passes(ctx) + hp.note("OPTIMIZED: " + str(opt_ctx)) bytecode1 = self.compile_to_bytecode(ctx) bytecode2 = self.compile_to_bytecode(opt_ctx) @@ -589,7 +604,7 @@ def venom_with_calldata(draw): # ) @hp.given(venom_data=venom_with_calldata()) @hp.settings( - max_examples=10, + max_examples=1000, suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow), deadline=None, phases=( diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index f4b28a3bce..b1ffd24baf 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -279,7 +279,7 @@ def _emit_input_operands( self.dup_op(assembly, stack, op) # guaranteed by store expansion - assert op not in seen, (op, seen) + assert op not in seen, (op, inst) seen.add(op) def _prepare_stack_for_function(self, asm, fn: IRFunction, stack: StackModel): From 27a09ca77f69ac5dfd1d6687ef9eb71a43d28339 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Jun 2025 12:34:01 +0200 Subject: [PATCH 19/24] add cfg normalization pass --- tests/functional/venom/test_memory_fuzzer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index a6bd545055..1c86c8d2e2 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -21,6 +21,7 @@ from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.passes import ( + CFGNormalization, AssignElimination, DeadStoreElimination, LoadElimination, @@ -514,6 +515,7 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes: SimplifyCFGPass(ac, fn).run_pass() SingleUseExpansion(ac, fn).run_pass() MakeSSA(ac, fn).run_pass() + CFGNormalization(ac, fn).run_pass() fn.freshen_varnames() hp.note(str(ctx)) From cfa0d1d9ab144edd8dd40ca010fc71bf058b2b79 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Jun 2025 12:34:10 +0200 Subject: [PATCH 20/24] add debug --- tests/functional/venom/test_memory_fuzzer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 1c86c8d2e2..ec696f7128 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -606,7 +606,7 @@ def venom_with_calldata(draw): # ) @hp.given(venom_data=venom_with_calldata()) @hp.settings( - max_examples=1000, + max_examples=50, suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow), deadline=None, phases=( @@ -616,6 +616,7 @@ def venom_with_calldata(draw): hp.Phase.target, # Phase.shrink, # can force long waiting for examples ), + verbosity=hp.Verbosity.debug, ) def test_memory_passes_fuzzing(venom_data, env): """ From 215efd1efb52b3075f14b074fd74be2d38f7b76c Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Jun 2025 16:51:50 +0200 Subject: [PATCH 21/24] update variable definitions --- tests/functional/venom/test_memory_fuzzer.py | 211 +++++++++++++++++-- 1 file changed, 192 insertions(+), 19 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index ec696f7128..72cc52dd66 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -21,8 +21,8 @@ from vyper.venom.context import IRContext from vyper.venom.function import IRFunction from vyper.venom.passes import ( - CFGNormalization, AssignElimination, + CFGNormalization, DeadStoreElimination, LoadElimination, MakeSSA, @@ -105,6 +105,8 @@ def __init__(self): self.allocated_memory_slots = set() # track which variables are available in each block self.bb_available_vars = {} + # variables defined in entry block (available everywhere) + self.entry_vars = set() # symbolic variable tracking self.symbolic_counter = 0 @@ -120,23 +122,181 @@ def fresh_symbolic(self) -> SymbolicVar: self.symbolic_counter += 1 return SymbolicVar(f"%sym_{self.symbolic_counter}") - def resolve_all_variables(self): + def compute_reachable_blocks( + self, cfg: dict[IRBasicBlock, _BBType] + ) -> dict[IRBasicBlock, set[IRBasicBlock]]: + """Compute which blocks are reachable from each block.""" + reachable_from = {} + + def get_successors(bb: IRBasicBlock) -> list[IRBasicBlock]: + """Get successor blocks based on CFG.""" + bb_type = cfg.get(bb) + if isinstance(bb_type, _JumpBB): + return [bb_type.target] + elif isinstance(bb_type, _BranchBB): + return [bb_type.target1, bb_type.target2] + else: + return [] + + def compute_reachable( + block: IRBasicBlock, visited: set[IRBasicBlock] = None + ) -> set[IRBasicBlock]: + if visited is None: + visited = set() + if block in visited: + return set() + visited.add(block) + + result = {block} + for succ in get_successors(block): + result.update(compute_reachable(succ, visited)) + return result + + for bb in self.function.get_basic_blocks(): + reachable_from[bb] = compute_reachable(bb) + + return reachable_from + + def compute_variable_availability( + self, cfg: dict[IRBasicBlock, _BBType] + ) -> dict[IRBasicBlock, set[IRVariable]]: + """Compute which variables are available at each block after resolution.""" + # First, compute reachability + reachable_from = self.compute_reachable_blocks(cfg) + + # Find where each variable is defined + var_defs = {} # var -> defining block + for bb in self.function.get_basic_blocks(): + for inst in bb.instructions: + if inst.output and isinstance(inst.output, IRVariable): + var_defs[inst.output] = bb + + # Compute availability + available_at = {} # block -> set of available vars + for bb in self.function.get_basic_blocks(): + available_at[bb] = set() + + # Variables are available in their defining block and all reachable blocks + for var, def_block in var_defs.items(): + for bb in reachable_from[def_block]: + available_at[bb].add(var) + + # Entry block variables are available everywhere + entry_bb = self.function.entry + for inst in entry_bb.instructions: + if inst.output and isinstance(inst.output, IRVariable): + for bb in self.function.get_basic_blocks(): + available_at[bb].add(inst.output) + + return available_at + + def propagate_available_vars(self, cfg: dict[IRBasicBlock, _BBType]) -> None: + """Update bb_available_vars to include variables from predecessors.""" + # Build predecessor map + predecessors = defaultdict(list) + for bb, bb_type in cfg.items(): + if isinstance(bb_type, _JumpBB): + predecessors[bb_type.target].append(bb) + elif isinstance(bb_type, _BranchBB): + predecessors[bb_type.target1].append(bb) + predecessors[bb_type.target2].append(bb) + + # Initialize with local definitions + for bb in self.function.get_basic_blocks(): + if bb not in self.bb_available_vars: + self.bb_available_vars[bb] = [] + + # Add entry block variables to all blocks + entry_vars = self.bb_available_vars.get(self.function.entry, []) + for bb in self.function.get_basic_blocks(): + if bb != self.function.entry: + # Add entry vars that aren't already there + for var in entry_vars: + if var not in self.bb_available_vars[bb]: + self.bb_available_vars[bb].append(var) + + # Fixed-point iteration to propagate variables + changed = True + while changed: + changed = False + for bb in self.function.get_basic_blocks(): + if bb == self.function.entry: + continue + + # Variables available at block entry = intersection of predecessor outputs + if bb in predecessors and predecessors[bb]: + # Start with variables from first predecessor + available = set(self.bb_available_vars.get(predecessors[bb][0], [])) + + # Intersect with other predecessors + for pred in predecessors[bb][1:]: + available &= set(self.bb_available_vars.get(pred, [])) + + # Add available vars that aren't already tracked + for var in available: + if var not in self.bb_available_vars[bb]: + self.bb_available_vars[bb].append(var) + changed = True + + def resolve_all_variables(self, cfg: dict[IRBasicBlock, _BBType]): """After building all blocks, replace symbolic vars with real ones""" - # resolve "symbolic" vars to real variables - symbolic_mapping = defaultdict(self.get_next_variable) + # Compute which variables are available at each block + available_at = self.compute_variable_availability(cfg) + + # Track which real variable each symbolic var maps to globally + # We need a global mapping because symbolic vars can be used across blocks + symbolic_mapping = {} + + # Track variables we've allocated but not yet assigned + unassigned_vars = set() + + # First pass: resolve all output variables for bb in self.function.get_basic_blocks(): for inst in bb.instructions: - # remap all "symbolic" variables if inst.output and isinstance(inst.output, SymbolicVar): - inst.output = symbolic_mapping[inst.output] + # Create a new variable for outputs + symbolic_var = inst.output + real_var = self.get_next_variable() + inst.output = real_var + symbolic_mapping[symbolic_var] = real_var + # This variable is now available in this block and all reachable blocks + + # Second pass: resolve all operand variables based on what's available + for bb in self.function.get_basic_blocks(): + available_vars = list(available_at.get(bb, set())) + # Collect instructions that need calldataloads inserted before them + insertions = [] # List of (index, instruction) pairs + + for i, inst in enumerate(bb.instructions): new_operands = [] for op in inst.operands: if isinstance(op, SymbolicVar): - op = symbolic_mapping[op] + # Check if we already resolved this symbolic var + if op in symbolic_mapping: + real_var = symbolic_mapping[op] + else: + # This symbolic var hasn't been resolved yet + # It must be used before being defined as an output + # Create a fresh variable and initialize it + real_var = self.get_next_variable() + symbolic_mapping[op] = real_var + + # Schedule calldataload insertion before this instruction + load_inst = IRInstruction( + "calldataload", [IRLiteral(self.calldata_offset)], real_var + ) + insertions.append((i, load_inst)) + self.calldata_offset += 32 + + op = real_var new_operands.append(op) inst.operands = new_operands + # Insert calldataloads in reverse order to preserve indices + for idx, load_inst in reversed(insertions): + bb.insert_instruction(load_inst, index=idx) + def ensure_all_vars_have_values(self) -> None: """Ensure all available variables have values by using calldataload for unassigned ones.""" assigned_vars = set() @@ -160,17 +320,17 @@ def get_next_bb_label(self) -> IRLabel: def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable | SymbolicVar: """Get a random available variable or create a new one.""" - available_in_bb = self.bb_available_vars.get(bb, []) - if available_in_bb and draw(st.booleans()): - return draw(st.sampled_from(available_in_bb)) - else: - return self.fresh_symbolic() + if bb not in self.bb_available_vars: + self.bb_available_vars[bb] = [] + + # During generation phase, always return symbolic variables + # They will be resolved to appropriate real variables later based on availability + return self.fresh_symbolic() def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral: """Get a memory address, biased towards interesting optimizer-relevant locations.""" - available_in_bb = self.bb_available_vars.get(bb, []) - if available_in_bb and draw(st.booleans()): - return draw(st.sampled_from(available_in_bb)) + # For now, only return literals to avoid cross-block availability issues + # TODO: Once we have proper availability tracking, we can use variables again if self.allocated_memory_slots and draw(st.booleans()): # bias towards addresses near existing allocations to create aliasing opportunities @@ -467,21 +627,32 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: bb.append_instruction("jmp", bb_type.target.label) elif isinstance(bb_type, _BranchBB): + # Ensure we have available vars tracked for this block + if bb not in fuzzer.bb_available_vars: + fuzzer.bb_available_vars[bb] = [] + cond_var = fuzzer.get_random_variable(draw, bb) # get bottom bit, for bias reasons cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) + fuzzer.bb_available_vars[bb].append(cond_var) if bb_type.needs_loop_counter: loop_counter_addr = IRLiteral(bb_type.counter_addr) counter = bb.append_instruction("mload", loop_counter_addr) + fuzzer.bb_available_vars[bb].append(counter) + incr_counter = bb.append_instruction("add", counter, IRLiteral(1)) + fuzzer.bb_available_vars[bb].append(incr_counter) + bb.append_instruction("mstore", incr_counter, loop_counter_addr) max_iterations = IRLiteral(MAX_LOOP_ITERATIONS) counter_ok = bb.append_instruction("lt", counter, max_iterations) + fuzzer.bb_available_vars[bb].append(counter_ok) cond_var = bb.append_instruction("and", counter_ok, cond_var) + fuzzer.bb_available_vars[bb].append(cond_var) # when there is a back edge, target2 is always the forward edge bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label) @@ -489,8 +660,11 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: else: raise Exception() # unreachable + # propagate available variables through the CFG + fuzzer.propagate_available_vars(cfg) + # resolve all symbolic variables to real ones - fuzzer.resolve_all_variables() + fuzzer.resolve_all_variables(cfg) fuzzer.ensure_all_vars_have_values() @@ -509,12 +683,11 @@ def __init__(self, passes: list[type]): def compile_to_bytecode(self, ctx: IRContext) -> bytes: """Compile Venom IR context to EVM bytecode.""" - # assumes MakeSSA has already been run for fn in ctx.functions.values(): ac = IRAnalysesCache(fn) SimplifyCFGPass(ac, fn).run_pass() - SingleUseExpansion(ac, fn).run_pass() MakeSSA(ac, fn).run_pass() + SingleUseExpansion(ac, fn).run_pass() CFGNormalization(ac, fn).run_pass() fn.freshen_varnames() @@ -616,7 +789,7 @@ def venom_with_calldata(draw): hp.Phase.target, # Phase.shrink, # can force long waiting for examples ), - verbosity=hp.Verbosity.debug, + # verbosity=hp.Verbosity.debug, ) def test_memory_passes_fuzzing(venom_data, env): """ From 6c08d3b942e211292a01d53b911084d83c8cdc33 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Jun 2025 21:54:58 +0200 Subject: [PATCH 22/24] fix[venom]: fix last_variable in function copy --- vyper/venom/function.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vyper/venom/function.py b/vyper/venom/function.py index 61a3e74111..f6ff704979 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -162,6 +162,9 @@ def error_msg(self) -> Optional[str]: def copy(self): new = IRFunction(self.name) + + new.last_variable = self.last_variable + new.clear_basic_blocks() for bb in self.get_basic_blocks(): new_bb = bb.copy() From 351c58e139a32271f6da874baa80bbf390da4a9f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Jun 2025 17:29:12 +0200 Subject: [PATCH 23/24] wip - rework variable allocation --- tests/functional/venom/test_memory_fuzzer.py | 257 +++---------------- 1 file changed, 41 insertions(+), 216 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 72cc52dd66..95bd61397c 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -101,230 +101,67 @@ def __init__(self): self.function = None self.bb_counter = 0 self.calldata_offset = MAX_MEMORY_SIZE - self.available_vars = [] self.allocated_memory_slots = set() - # track which variables are available in each block - self.bb_available_vars = {} - # variables defined in entry block (available everywhere) - self.entry_vars = set() # symbolic variable tracking self.symbolic_counter = 0 def get_next_variable(self) -> IRVariable: """Generate a new unique variable using the function's allocator.""" assert self.function is not None, "Function must be set before allocating variables" - var = self.function.get_next_variable() - self.available_vars.append(var) - return var + return self.function.get_next_variable() def fresh_symbolic(self) -> SymbolicVar: """Create a new symbolic variable""" self.symbolic_counter += 1 return SymbolicVar(f"%sym_{self.symbolic_counter}") - def compute_reachable_blocks( - self, cfg: dict[IRBasicBlock, _BBType] - ) -> dict[IRBasicBlock, set[IRBasicBlock]]: - """Compute which blocks are reachable from each block.""" - reachable_from = {} - - def get_successors(bb: IRBasicBlock) -> list[IRBasicBlock]: - """Get successor blocks based on CFG.""" - bb_type = cfg.get(bb) - if isinstance(bb_type, _JumpBB): - return [bb_type.target] - elif isinstance(bb_type, _BranchBB): - return [bb_type.target1, bb_type.target2] - else: - return [] - - def compute_reachable( - block: IRBasicBlock, visited: set[IRBasicBlock] = None - ) -> set[IRBasicBlock]: - if visited is None: - visited = set() - if block in visited: - return set() - visited.add(block) - - result = {block} - for succ in get_successors(block): - result.update(compute_reachable(succ, visited)) - return result - - for bb in self.function.get_basic_blocks(): - reachable_from[bb] = compute_reachable(bb) - - return reachable_from - - def compute_variable_availability( - self, cfg: dict[IRBasicBlock, _BBType] - ) -> dict[IRBasicBlock, set[IRVariable]]: - """Compute which variables are available at each block after resolution.""" - # First, compute reachability - reachable_from = self.compute_reachable_blocks(cfg) - - # Find where each variable is defined - var_defs = {} # var -> defining block - for bb in self.function.get_basic_blocks(): - for inst in bb.instructions: - if inst.output and isinstance(inst.output, IRVariable): - var_defs[inst.output] = bb - - # Compute availability - available_at = {} # block -> set of available vars - for bb in self.function.get_basic_blocks(): - available_at[bb] = set() - - # Variables are available in their defining block and all reachable blocks - for var, def_block in var_defs.items(): - for bb in reachable_from[def_block]: - available_at[bb].add(var) - - # Entry block variables are available everywhere - entry_bb = self.function.entry - for inst in entry_bb.instructions: - if inst.output and isinstance(inst.output, IRVariable): - for bb in self.function.get_basic_blocks(): - available_at[bb].add(inst.output) - - return available_at - - def propagate_available_vars(self, cfg: dict[IRBasicBlock, _BBType]) -> None: - """Update bb_available_vars to include variables from predecessors.""" - # Build predecessor map - predecessors = defaultdict(list) - for bb, bb_type in cfg.items(): - if isinstance(bb_type, _JumpBB): - predecessors[bb_type.target].append(bb) - elif isinstance(bb_type, _BranchBB): - predecessors[bb_type.target1].append(bb) - predecessors[bb_type.target2].append(bb) - - # Initialize with local definitions - for bb in self.function.get_basic_blocks(): - if bb not in self.bb_available_vars: - self.bb_available_vars[bb] = [] - - # Add entry block variables to all blocks - entry_vars = self.bb_available_vars.get(self.function.entry, []) - for bb in self.function.get_basic_blocks(): - if bb != self.function.entry: - # Add entry vars that aren't already there - for var in entry_vars: - if var not in self.bb_available_vars[bb]: - self.bb_available_vars[bb].append(var) - - # Fixed-point iteration to propagate variables - changed = True - while changed: - changed = False - for bb in self.function.get_basic_blocks(): - if bb == self.function.entry: - continue - - # Variables available at block entry = intersection of predecessor outputs - if bb in predecessors and predecessors[bb]: - # Start with variables from first predecessor - available = set(self.bb_available_vars.get(predecessors[bb][0], [])) - - # Intersect with other predecessors - for pred in predecessors[bb][1:]: - available &= set(self.bb_available_vars.get(pred, [])) - - # Add available vars that aren't already tracked - for var in available: - if var not in self.bb_available_vars[bb]: - self.bb_available_vars[bb].append(var) - changed = True - def resolve_all_variables(self, cfg: dict[IRBasicBlock, _BBType]): """After building all blocks, replace symbolic vars with real ones""" - # Compute which variables are available at each block - available_at = self.compute_variable_availability(cfg) - - # Track which real variable each symbolic var maps to globally - # We need a global mapping because symbolic vars can be used across blocks + # Simple global mapping - each symbolic var gets one real var symbolic_mapping = {} - # Track variables we've allocated but not yet assigned - unassigned_vars = set() - - # First pass: resolve all output variables - for bb in self.function.get_basic_blocks(): - for inst in bb.instructions: - if inst.output and isinstance(inst.output, SymbolicVar): - # Create a new variable for outputs - symbolic_var = inst.output - real_var = self.get_next_variable() - inst.output = real_var - symbolic_mapping[symbolic_var] = real_var - # This variable is now available in this block and all reachable blocks - - # Second pass: resolve all operand variables based on what's available for bb in self.function.get_basic_blocks(): - available_vars = list(available_at.get(bb, set())) - - # Collect instructions that need calldataloads inserted before them - insertions = [] # List of (index, instruction) pairs + insertions = [] for i, inst in enumerate(bb.instructions): + # First, handle output to allocate variable if needed + output_sym = None + if inst.output and isinstance(inst.output, SymbolicVar): + output_sym = inst.output + if inst.output not in symbolic_mapping: + symbolic_mapping[inst.output] = self.get_next_variable() + inst.output = symbolic_mapping[inst.output] + + # Then resolve operands new_operands = [] for op in inst.operands: if isinstance(op, SymbolicVar): - # Check if we already resolved this symbolic var - if op in symbolic_mapping: - real_var = symbolic_mapping[op] - else: - # This symbolic var hasn't been resolved yet - # It must be used before being defined as an output - # Create a fresh variable and initialize it + if op not in symbolic_mapping: + # First use - create variable and schedule initialization real_var = self.get_next_variable() symbolic_mapping[op] = real_var - - # Schedule calldataload insertion before this instruction load_inst = IRInstruction( "calldataload", [IRLiteral(self.calldata_offset)], real_var ) insertions.append((i, load_inst)) self.calldata_offset += 32 - - op = real_var + op = symbolic_mapping[op] new_operands.append(op) inst.operands = new_operands - # Insert calldataloads in reverse order to preserve indices + # Insert calldataloads for idx, load_inst in reversed(insertions): bb.insert_instruction(load_inst, index=idx) - def ensure_all_vars_have_values(self) -> None: - """Ensure all available variables have values by using calldataload for unassigned ones.""" - assigned_vars = set() - for bb in self.function.get_basic_blocks(): - for inst in bb.instructions: - if inst.output: - assigned_vars.add(inst.output) - - entry_bb = self.function.entry - unassigned_vars = [var for var in self.available_vars if var not in assigned_vars] - - for i, var in enumerate(unassigned_vars): - inst = IRInstruction("calldataload", [IRLiteral(self.calldata_offset)], var) - entry_bb.insert_instruction(inst, index=i) - self.calldata_offset += 32 - def get_next_bb_label(self) -> IRLabel: """Generate a new unique basic block label.""" self.bb_counter += 1 return IRLabel(f"bb{self.bb_counter}") - def get_random_variable(self, draw, bb: IRBasicBlock) -> IRVariable | SymbolicVar: - """Get a random available variable or create a new one.""" - if bb not in self.bb_available_vars: - self.bb_available_vars[bb] = [] - - # During generation phase, always return symbolic variables - # They will be resolved to appropriate real variables later based on availability + def get_random_variable(self, draw, bb: IRBasicBlock) -> SymbolicVar: + """Get a symbolic variable that will be resolved later.""" + # Always return symbolic variables during generation phase + # They will be resolved to real variables with proper initialization return self.fresh_symbolic() def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral: @@ -369,22 +206,15 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: """Generate and append a memory instruction to current basic block.""" op = draw(st.sampled_from(MEMORY_OPS)) - # track variables defined so far in this block - if bb not in fuzzer.bb_available_vars: - fuzzer.bb_available_vars[bb] = [] - if op == "mload": addr = fuzzer.get_memory_address(draw, bb) result_var = fuzzer.fresh_symbolic() bb.append_instruction("mload", addr, ret=result_var) - # add to variables available in this block - fuzzer.bb_available_vars[bb].append(result_var) elif op == "mstore": - # can use variables defined earlier in this block - available_in_bb = fuzzer.bb_available_vars.get(bb, []) - if available_in_bb and draw(st.booleans()): - value = draw(st.sampled_from(available_in_bb)) + # Use either a symbolic variable or a literal + if draw(st.booleans()): + value = fuzzer.get_random_variable(draw, bb) else: value = IRLiteral(draw(st.integers(min_value=0, max_value=2**256 - 1))) addr = fuzzer.get_memory_address(draw, bb) @@ -538,10 +368,14 @@ def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: # unreachable raise Exception(f"Unknown precompile: {precompile_name}") - gas = bb.append_instruction("gas") + gas = fuzzer.fresh_symbolic() + bb.append_instruction("gas", ret=gas) addr = IRLiteral(precompile_addr) - bb.append_instruction("staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size) + success = fuzzer.fresh_symbolic() + bb.append_instruction( + "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size, ret=success + ) @st.composite @@ -627,47 +461,38 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: bb.append_instruction("jmp", bb_type.target.label) elif isinstance(bb_type, _BranchBB): - # Ensure we have available vars tracked for this block - if bb not in fuzzer.bb_available_vars: - fuzzer.bb_available_vars[bb] = [] - cond_var = fuzzer.get_random_variable(draw, bb) # get bottom bit, for bias reasons - cond_var = bb.append_instruction("and", cond_var, IRLiteral(1)) - fuzzer.bb_available_vars[bb].append(cond_var) + cond_result = fuzzer.fresh_symbolic() + bb.append_instruction("and", cond_var, IRLiteral(1), ret=cond_result) if bb_type.needs_loop_counter: loop_counter_addr = IRLiteral(bb_type.counter_addr) - counter = bb.append_instruction("mload", loop_counter_addr) - fuzzer.bb_available_vars[bb].append(counter) - - incr_counter = bb.append_instruction("add", counter, IRLiteral(1)) - fuzzer.bb_available_vars[bb].append(incr_counter) + counter = fuzzer.fresh_symbolic() + bb.append_instruction("mload", loop_counter_addr, ret=counter) + incr_counter = fuzzer.fresh_symbolic() + bb.append_instruction("add", counter, IRLiteral(1), ret=incr_counter) bb.append_instruction("mstore", incr_counter, loop_counter_addr) max_iterations = IRLiteral(MAX_LOOP_ITERATIONS) - counter_ok = bb.append_instruction("lt", counter, max_iterations) - fuzzer.bb_available_vars[bb].append(counter_ok) + counter_ok = fuzzer.fresh_symbolic() + bb.append_instruction("lt", counter, max_iterations, ret=counter_ok) - cond_var = bb.append_instruction("and", counter_ok, cond_var) - fuzzer.bb_available_vars[bb].append(cond_var) + final_cond = fuzzer.fresh_symbolic() + bb.append_instruction("and", counter_ok, cond_result, ret=final_cond) + cond_result = final_cond # when there is a back edge, target2 is always the forward edge - bb.append_instruction("jnz", cond_var, bb_type.target1.label, bb_type.target2.label) + bb.append_instruction("jnz", cond_result, bb_type.target1.label, bb_type.target2.label) else: raise Exception() # unreachable - # propagate available variables through the CFG - fuzzer.propagate_available_vars(cfg) - # resolve all symbolic variables to real ones fuzzer.resolve_all_variables(cfg) - fuzzer.ensure_all_vars_have_values() - # freshen variable names for easier debugging for fn in fuzzer.ctx.functions.values(): fn.freshen_varnames() From fd322dea23a6333a394a3d6732518a3455b985ac Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 26 Jun 2025 23:57:16 +0200 Subject: [PATCH 24/24] some polishing --- tests/functional/venom/test_memory_fuzzer.py | 217 ++++++++++++------- 1 file changed, 144 insertions(+), 73 deletions(-) diff --git a/tests/functional/venom/test_memory_fuzzer.py b/tests/functional/venom/test_memory_fuzzer.py index 95bd61397c..fba2ae4cfb 100644 --- a/tests/functional/venom/test_memory_fuzzer.py +++ b/tests/functional/venom/test_memory_fuzzer.py @@ -4,8 +4,14 @@ This fuzzer generates complex control flow with memory instructions to test memory optimization passes. It uses the IRBasicBlock API directly and can be plugged with any Venom passes. + +The fuzzer works in two phases: +1. Generation Phase: Creates IR with symbolic variables that can be used before definition +2. Resolution Phase: Replaces symbolic variables with real variables and inserts initialization + +This two-phase approach enables complex cross-block dataflow patterns that would be +difficult to generate with a single pass. """ -from collections import defaultdict from dataclasses import dataclass from typing import Optional @@ -14,6 +20,7 @@ import pytest from tests.evm_backends.base_env import EvmError +from tests.venom_utils import assert_ctx_eq from vyper.ir.compile_ir import assembly_to_evm from vyper.venom import VenomCompiler from vyper.venom.analysis import IRAnalysesCache @@ -31,6 +38,10 @@ SingleUseExpansion, ) +# ============================================================================ +# Constants +# ============================================================================ + MEMORY_OPS = ["mload", "mstore", "mcopy"] # precompiles act as fence operations that generate real output data, @@ -53,6 +64,11 @@ MAX_LOOP_ITERATIONS = 12 +# ============================================================================ +# Basic Block Types +# ============================================================================ + + @dataclass class _BBType: """Base class for basic block types in the CFG.""" @@ -76,7 +92,11 @@ class _JumpBB(_BBType): @dataclass class _BranchBB(_BBType): - """Basic block with conditional branch.""" + """Basic block with conditional branch. + + Convention: If there's a back edge, target1 is the back edge and + target2 is the forward edge. This ensures consistent loop structure. + """ target1: IRBasicBlock target2: IRBasicBlock @@ -87,22 +107,44 @@ def needs_loop_counter(self) -> bool: return self.counter_addr is not None +# ============================================================================ +# Symbolic Variables +# ============================================================================ + + class SymbolicVar(IRVariable): - """Placeholder for a variable that will be resolved later""" + """Placeholder for a variable that will be resolved later. + + Symbolic variables enable cross-block dataflow patterns by allowing + uses before definitions. During the resolution phase, each symbolic + variable is replaced with a real variable and initialized via calldataload + if it's used before being defined. + """ pass +# ============================================================================ +# Memory Fuzzer +# ============================================================================ + + class MemoryFuzzer: - """Generates random Venom IR with memory operations using IRBasicBlock API.""" + """Generates random Venom IR with memory operations using IRBasicBlock API. + + This fuzzer creates complex control flow patterns with memory operations + to stress-test memory optimization passes. It works in two phases: + + 1. Generation: Build IR with symbolic variables, allowing flexible dataflow + 2. Resolution: Replace symbolic variables with real ones and add initialization + """ def __init__(self): self.ctx = IRContext() self.function = None self.bb_counter = 0 - self.calldata_offset = MAX_MEMORY_SIZE + self.calldata_offset = MAX_MEMORY_SIZE # Start after memory seed data self.allocated_memory_slots = set() - # symbolic variable tracking self.symbolic_counter = 0 def get_next_variable(self) -> IRVariable: @@ -115,7 +157,7 @@ def fresh_symbolic(self) -> SymbolicVar: self.symbolic_counter += 1 return SymbolicVar(f"%sym_{self.symbolic_counter}") - def resolve_all_variables(self, cfg: dict[IRBasicBlock, _BBType]): + def resolve_all_variables(self, block_types: dict[IRBasicBlock, _BBType]): """After building all blocks, replace symbolic vars with real ones""" # Simple global mapping - each symbolic var gets one real var symbolic_mapping = {} @@ -166,8 +208,7 @@ def get_random_variable(self, draw, bb: IRBasicBlock) -> SymbolicVar: def get_memory_address(self, draw, bb: IRBasicBlock) -> IRVariable | IRLiteral: """Get a memory address, biased towards interesting optimizer-relevant locations.""" - # For now, only return literals to avoid cross-block availability issues - # TODO: Once we have proper availability tracking, we can use variables again + # Currently only returns literals to keep fuzzing patterns simple if self.allocated_memory_slots and draw(st.booleans()): # bias towards addresses near existing allocations to create aliasing opportunities @@ -227,7 +268,12 @@ def memory_instruction(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: bb.append_instruction("mcopy", dest, src, IRLiteral(length)) else: - raise Exception("unreachable") + raise AssertionError(f"Unknown memory operation: {op}") + + +# ============================================================================ +# Control Flow Generation +# ============================================================================ @st.composite @@ -239,10 +285,10 @@ def control_flow_graph(draw, basic_blocks): 3. Proper use of jump and branch instructions 4. No back edges to entry block """ - cfg: dict[IRBasicBlock, _BBType] = {} + block_types: dict[IRBasicBlock, _BBType] = {} # last block is always a return block - guarantees all other blocks have forward targets - cfg[basic_blocks[-1]] = _ReturnBB() + block_types[basic_blocks[-1]] = _ReturnBB() # cache forward targets for each block for performance forward_targets = {} @@ -260,43 +306,42 @@ def control_flow_graph(draw, basic_blocks): source = draw(st.sampled_from(reachable_blocks)) # we have already visited it - if source in cfg: + if source in block_types: continue target = draw(st.sampled_from(remaining_blocks)) - # target is now reachable, but it may not be in cfg yet + # target is now reachable, but it may not be in block_types yet reachable_blocks.append(target) remaining_blocks.remove(target) if draw(st.booleans()): - cfg[source] = _JumpBB(target=target) + block_types[source] = _JumpBB(target=target) else: # For branches, allow any block as the other target except entry # (target is already guaranteed to be forward) other_target = draw(st.sampled_from(non_entry_blocks)) is_back_edge = basic_blocks.index(other_target) <= basic_blocks.index(source) - # counter_addr = loop_counter_addr if is_back_edge else None # if other_target is the back edge, swap so back edge is always target1 if is_back_edge: other_target, target = target, other_target - cfg[source] = _BranchBB(target1=target, target2=other_target) + block_types[source] = _BranchBB(target1=target, target2=other_target) # classify remaining blocks that were not handled during spanning # tree construction. loop_counter_addr = MAX_MEMORY_SIZE for bb in basic_blocks: - if bb in cfg: + if bb in block_types: continue edge_type = draw(st.sampled_from(["jump", "branch"])) if edge_type == "jump": target = draw(st.sampled_from(forward_targets[bb])) - cfg[bb] = _JumpBB(target=target) + block_types[bb] = _JumpBB(target=target) else: # branch # Choose targets, but never allow entry as a target target1 = draw(st.sampled_from(non_entry_blocks)) @@ -318,12 +363,12 @@ def control_flow_graph(draw, basic_blocks): counter_addr = loop_counter_addr if contains_back_edge else None - cfg[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr) + block_types[bb] = _BranchBB(target1=target1, target2=target2, counter_addr=counter_addr) if contains_back_edge: loop_counter_addr += 32 - return cfg + return block_types @st.composite @@ -365,16 +410,21 @@ def precompile_call(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> None: input_size = IRLiteral(213) # blake2f requires specific input size output_size = IRLiteral(64) else: - # unreachable - raise Exception(f"Unknown precompile: {precompile_name}") + raise AssertionError(f"Unknown precompile: {precompile_name}") gas = fuzzer.fresh_symbolic() bb.append_instruction("gas", ret=gas) addr = IRLiteral(precompile_addr) success = fuzzer.fresh_symbolic() - bb.append_instruction( - "staticcall", gas, addr, input_ofst, input_size, output_ofst, output_size, ret=success + bb.append_instruction( "staticcall", + output_size, + output_ofst, + input_size, + input_ofst, + addr, + gas, + ret=success ) @@ -391,7 +441,12 @@ def basic_block_instructions(draw, fuzzer: MemoryFuzzer, bb: IRBasicBlock) -> No elif inst_type == "precompile": draw(precompile_call(fuzzer, bb)) else: - raise Exception("unreachable") + raise AssertionError(f"Unknown instruction type: {inst_type}") + + +# ============================================================================ +# Main Generation Function +# ============================================================================ @st.composite @@ -405,11 +460,13 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: """ fuzzer = MemoryFuzzer() + # ---- Setup function and context ---- func_name = IRLabel("_fuzz_function", is_symbol=True) fuzzer.function = IRFunction(func_name, fuzzer.ctx) fuzzer.ctx.functions[func_name] = fuzzer.function fuzzer.ctx.entry_function = fuzzer.function + # ---- Generate basic blocks ---- num_blocks = draw(st.integers(min_value=1, max_value=MAX_BASIC_BLOCKS)) basic_blocks = [] @@ -428,8 +485,10 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: assert fuzzer.function.entry is basic_blocks[0] - cfg = draw(control_flow_graph(basic_blocks)) + # ---- Generate control flow ---- + block_types = draw(control_flow_graph(basic_blocks)) + # ---- Initialize memory and loop counters ---- entry_block = basic_blocks[0] entry_block.append_instruction( "calldatacopy", IRLiteral(0), IRLiteral(0), IRLiteral(MAX_MEMORY_SIZE) @@ -437,7 +496,7 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: # extract loop counter addresses and initialize them counter_addrs = set() - for bb_type in cfg.values(): + for bb_type in block_types.values(): if isinstance(bb_type, _BranchBB) and bb_type.counter_addr is not None: addr = bb_type.counter_addr assert addr not in counter_addrs, f"Duplicate counter address {addr}" @@ -446,13 +505,13 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: for addr in counter_addrs: entry_block.append_instruction("mstore", IRLiteral(0), IRLiteral(addr)) - # generate instructions for each block + # ---- Generate instructions ---- for bb in basic_blocks: draw(basic_block_instructions(fuzzer, bb)) - # add terminators + # ---- Add terminators ---- for bb in basic_blocks: - bb_type = cfg[bb] + bb_type = block_types[bb] if isinstance(bb_type, _ReturnBB): bb.append_instruction("return", IRLiteral(MAX_MEMORY_SIZE), IRLiteral(0)) @@ -488,10 +547,10 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: bb.append_instruction("jnz", cond_result, bb_type.target1.label, bb_type.target2.label) else: - raise Exception() # unreachable + raise AssertionError(f"Unknown basic block type: {type(bb_type)}") - # resolve all symbolic variables to real ones - fuzzer.resolve_all_variables(cfg) + # ---- Phase 2: Resolve symbolic variables ---- + fuzzer.resolve_all_variables(block_types) # freshen variable names for easier debugging for fn in fuzzer.ctx.functions.values(): @@ -500,6 +559,11 @@ def venom_function_with_memory_ops(draw) -> tuple[IRContext, int]: return fuzzer.ctx, fuzzer.calldata_offset +# ============================================================================ +# Memory Pass Checker +# ============================================================================ + + class MemoryFuzzChecker: """A pluggable checker for memory passes using fuzzing.""" @@ -514,9 +578,8 @@ def compile_to_bytecode(self, ctx: IRContext) -> bytes: MakeSSA(ac, fn).run_pass() SingleUseExpansion(ac, fn).run_pass() CFGNormalization(ac, fn).run_pass() - fn.freshen_varnames() - hp.note(str(ctx)) + # hp.note(str(ctx)) compiler = VenomCompiler([ctx]) asm = compiler.generate_evm() @@ -554,8 +617,9 @@ def execute_bytecode(self, bytecode: bytes, calldata: bytes, env) -> tuple[bool, try: result = env.message_call(to=deployed_address, data=calldata) return True, result - except EvmError: - return False, b"" + except EvmError as e: + # stub for future handling of programs that are actually expected to revert + raise def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None: """Check equivalence between unoptimized and optimized execution.""" @@ -564,21 +628,49 @@ def check_equivalence(self, ctx: IRContext, calldata: bytes, env) -> None: ac = IRAnalysesCache(fn) MakeSSA(ac, fn).run_pass() AssignElimination(ac, fn).run_pass() - hp.note("UNOPTIMIZED: " + str(ctx)) + fn.freshen_varnames() opt_ctx = self.run_passes(ctx) + for fn in opt_ctx.functions.values(): + fn.freshen_varnames() + + try: + assert_ctx_eq(ctx, opt_ctx) + except AssertionError as e: + equals = False + msg = e.args[0] + else: + equals = True + + if equals: + hp.note("No optimization done") + return + hp.note("UNOPTIMIZED: " + str(ctx)) hp.note("OPTIMIZED: " + str(opt_ctx)) + hp.note("optimizations: " + str(msg)) bytecode1 = self.compile_to_bytecode(ctx) bytecode2 = self.compile_to_bytecode(opt_ctx) + hp.note(f"MSG CALL {calldata.hex()}") + succ1, out1 = self.execute_bytecode(bytecode1, calldata, env) succ2, out2 = self.execute_bytecode(bytecode2, calldata, env) + if not succ1 or not succ2: + hp.note("reverted") + else: + hp.note(f"OUT {out1.hex()}") + assert succ1 == succ2, (succ1, out1, succ2, out2) assert out1 == out2, (succ1, out1, succ2, out2) +# ============================================================================ +# Test Helpers +# ============================================================================ + + @st.composite def venom_with_calldata(draw): """Generate Venom IR context with matching calldata.""" @@ -587,34 +679,19 @@ def venom_with_calldata(draw): return ctx, calldata -# Test with memory-related passes +# ============================================================================ +# Property-Based Tests +# ============================================================================ + + @pytest.mark.fuzzing -# @pytest.mark.parametrize( -# "pass_list", -# [ -# # Test individual memory passes -# [MemMergePass], -# [LoadElimination], -# [DeadStoreElimination], -# # Test combinations -# [LoadElimination, DeadStoreElimination], -# [DeadStoreElimination, LoadElimination], -# [LoadElimination, MemMergePass], -# ], -# ) @hp.given(venom_data=venom_with_calldata()) @hp.settings( - max_examples=50, + max_examples=1000, suppress_health_check=(hp.HealthCheck.data_too_large, hp.HealthCheck.too_slow), deadline=None, - phases=( - hp.Phase.explicit, - hp.Phase.reuse, - hp.Phase.generate, - hp.Phase.target, - # Phase.shrink, # can force long waiting for examples - ), - # verbosity=hp.Verbosity.debug, + phases=(hp.Phase.explicit, hp.Phase.reuse, hp.Phase.generate, hp.Phase.target), + verbosity=hp.Verbosity.verbose, ) def test_memory_passes_fuzzing(venom_data, env): """ @@ -625,17 +702,15 @@ def test_memory_passes_fuzzing(venom_data, env): pass_list = [MemMergePass] ctx, calldata = venom_data - hp.note(f"Testing passes: {[p.__name__ for p in pass_list]}") - - func = list(ctx.functions.values())[0] - hp.note(f"Generated function with {func.num_basic_blocks} basic blocks") - hp.note(f"Calldata size: {len(calldata)} bytes") - hp.note(str(ctx)) - checker = MemoryFuzzChecker(pass_list) checker.check_equivalence(ctx, calldata, env) +# ============================================================================ +# Manual Testing +# ============================================================================ + + def generate_sample_ir() -> IRContext: """Generate a sample IR for manual inspection.""" ctx, _ = venom_function_with_memory_ops().example() @@ -644,10 +719,6 @@ def generate_sample_ir() -> IRContext: if __name__ == "__main__": ctx = generate_sample_ir() - - # func = list(ctx.functions.values())[0] - # print(func) - checker = MemoryFuzzChecker([MemMergePass]) checker.run_passes(ctx) print(ctx)